zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64}
  65
  66#[derive(
  67    Default,
  68    Clone,
  69    Copy,
  70    Debug,
  71    PartialEq,
  72    Eq,
  73    Hash,
  74    EnumIter,
  75    IntoStaticStr,
  76    Serialize,
  77    Deserialize,
  78)]
  79#[allow(non_camel_case_types)]
  80pub enum ZetaFormat {
  81    V0112MiddleAtEnd,
  82    V0113Ordered,
  83    V0114180EditableRegion,
  84    V0120GitMergeMarkers,
  85    #[default]
  86    V0131GitMergeMarkersPrefix,
  87    V0211Prefill,
  88    V0211SeedCoder,
  89    v0226Hashline,
  90}
  91
  92impl std::fmt::Display for ZetaFormat {
  93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  94        write!(f, "{}", <&'static str>::from(self))
  95    }
  96}
  97
  98impl ZetaFormat {
  99    pub fn parse(format_name: &str) -> Result<Self> {
 100        let mut results = ZetaFormat::iter().filter(|version| {
 101            <&'static str>::from(version)
 102                .to_lowercase()
 103                .contains(&format_name.to_lowercase())
 104        });
 105        let Some(result) = results.next() else {
 106            anyhow::bail!(
 107                "`{format_name}` did not match any of:\n{}",
 108                Self::options_as_string()
 109            );
 110        };
 111        if results.next().is_some() {
 112            anyhow::bail!(
 113                "`{format_name}` matched more than one of:\n{}",
 114                Self::options_as_string()
 115            );
 116        }
 117        Ok(result)
 118    }
 119
 120    pub fn options_as_string() -> String {
 121        ZetaFormat::iter()
 122            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 123            .collect::<Vec<_>>()
 124            .concat()
 125    }
 126}
 127
 128#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 129#[serde(tag = "event")]
 130pub enum Event {
 131    BufferChange {
 132        path: Arc<Path>,
 133        old_path: Arc<Path>,
 134        diff: String,
 135        predicted: bool,
 136        in_open_source_repo: bool,
 137    },
 138}
 139
 140impl Event {
 141    pub fn in_open_source_repo(&self) -> bool {
 142        match self {
 143            Event::BufferChange {
 144                in_open_source_repo,
 145                ..
 146            } => *in_open_source_repo,
 147        }
 148    }
 149}
 150
 151pub fn write_event(prompt: &mut String, event: &Event) {
 152    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 153        for component in path.components() {
 154            prompt.push('/');
 155            write!(prompt, "{}", component.as_os_str().display()).ok();
 156        }
 157    }
 158    match event {
 159        Event::BufferChange {
 160            path,
 161            old_path,
 162            diff,
 163            predicted,
 164            in_open_source_repo: _,
 165        } => {
 166            if *predicted {
 167                prompt.push_str("// User accepted prediction:\n");
 168            }
 169            prompt.push_str("--- a");
 170            write_path_as_unix_str(prompt, old_path.as_ref());
 171            prompt.push_str("\n+++ b");
 172            write_path_as_unix_str(prompt, path.as_ref());
 173            prompt.push('\n');
 174            prompt.push_str(diff);
 175        }
 176    }
 177}
 178
 179#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 180pub struct RelatedFile {
 181    pub path: Arc<Path>,
 182    pub max_row: u32,
 183    pub excerpts: Vec<RelatedExcerpt>,
 184    #[serde(default)]
 185    pub in_open_source_repo: bool,
 186}
 187
 188#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 189pub struct RelatedExcerpt {
 190    pub row_range: Range<u32>,
 191    pub text: Arc<str>,
 192    #[serde(default)]
 193    pub order: usize,
 194}
 195
 196pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 197    special_tokens_for_format(format)
 198        .iter()
 199        .any(|token| input.cursor_excerpt.contains(token))
 200}
 201
 202pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 203    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 204}
 205
 206pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 207    match format {
 208        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 209        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 210        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 211        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 212        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 213        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 214        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 215        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 216    }
 217}
 218
 219pub fn excerpt_ranges_for_format(
 220    format: ZetaFormat,
 221    ranges: &ExcerptRanges,
 222) -> (Range<usize>, Range<usize>) {
 223    match format {
 224        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 225            ranges.editable_150.clone(),
 226            ranges.editable_150_context_350.clone(),
 227        ),
 228        ZetaFormat::V0114180EditableRegion => (
 229            ranges.editable_180.clone(),
 230            ranges.editable_180_context_350.clone(),
 231        ),
 232        ZetaFormat::V0120GitMergeMarkers
 233        | ZetaFormat::V0131GitMergeMarkersPrefix
 234        | ZetaFormat::V0211Prefill
 235        | ZetaFormat::V0211SeedCoder
 236        | ZetaFormat::v0226Hashline => (
 237            ranges.editable_350.clone(),
 238            ranges.editable_350_context_150.clone(),
 239        ),
 240    }
 241}
 242
 243pub fn write_cursor_excerpt_section_for_format(
 244    format: ZetaFormat,
 245    prompt: &mut String,
 246    path: &Path,
 247    context: &str,
 248    editable_range: &Range<usize>,
 249    cursor_offset: usize,
 250) {
 251    match format {
 252        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 253            prompt,
 254            path,
 255            context,
 256            editable_range,
 257            cursor_offset,
 258        ),
 259        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 260            v0113_ordered::write_cursor_excerpt_section(
 261                prompt,
 262                path,
 263                context,
 264                editable_range,
 265                cursor_offset,
 266            )
 267        }
 268        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 269            prompt,
 270            path,
 271            context,
 272            editable_range,
 273            cursor_offset,
 274        ),
 275        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 276            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 277                prompt,
 278                path,
 279                context,
 280                editable_range,
 281                cursor_offset,
 282            )
 283        }
 284        ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section(
 285            prompt,
 286            path,
 287            context,
 288            editable_range,
 289            cursor_offset,
 290        ),
 291        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 292            prompt,
 293            path,
 294            context,
 295            editable_range,
 296            cursor_offset,
 297        ),
 298    }
 299}
 300
 301fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 302    let start_row = text[0..range.start].matches('\n').count() as u32;
 303    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 304    if !text[..range.end].ends_with('\n') {
 305        end_row += 1;
 306    }
 307    return start_row..end_row;
 308}
 309
 310pub fn format_prompt_with_budget_for_format(
 311    input: &ZetaPromptInput,
 312    format: ZetaFormat,
 313    max_tokens: usize,
 314) -> String {
 315    let (context, editable_range, context_range, cursor_offset) =
 316        resolve_cursor_region(input, format);
 317    let path = &*input.cursor_path;
 318
 319    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 320        let relative_row_range = offset_range_to_row_range(context, context_range);
 321        let row_range = relative_row_range.start + cursor_excerpt_start_row
 322            ..relative_row_range.end + cursor_excerpt_start_row;
 323        &filter_redundant_excerpts(
 324            input.related_files.clone(),
 325            input.cursor_path.as_ref(),
 326            row_range,
 327        )
 328    } else {
 329        &input.related_files
 330    };
 331
 332    match format {
 333        ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget(
 334            path,
 335            context,
 336            &editable_range,
 337            cursor_offset,
 338            &input.events,
 339            &related_files,
 340            max_tokens,
 341        ),
 342        _ => {
 343            let mut cursor_section = String::new();
 344            write_cursor_excerpt_section_for_format(
 345                format,
 346                &mut cursor_section,
 347                path,
 348                context,
 349                &editable_range,
 350                cursor_offset,
 351            );
 352
 353            let cursor_tokens = estimate_tokens(cursor_section.len());
 354            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 355
 356            let edit_history_section = format_edit_history_within_budget(
 357                &input.events,
 358                "<|file_sep|>",
 359                "edit history",
 360                budget_after_cursor,
 361            );
 362            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 363            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 364
 365            let related_files_section = format_related_files_within_budget(
 366                &related_files,
 367                "<|file_sep|>",
 368                "",
 369                budget_after_edit_history,
 370            );
 371
 372            let mut prompt = String::new();
 373            prompt.push_str(&related_files_section);
 374            prompt.push_str(&edit_history_section);
 375            prompt.push_str(&cursor_section);
 376            prompt
 377        }
 378    }
 379}
 380
 381pub fn filter_redundant_excerpts(
 382    mut related_files: Vec<RelatedFile>,
 383    cursor_path: &Path,
 384    cursor_row_range: Range<u32>,
 385) -> Vec<RelatedFile> {
 386    for file in &mut related_files {
 387        if file.path.as_ref() == cursor_path {
 388            file.excerpts.retain(|excerpt| {
 389                excerpt.row_range.start < cursor_row_range.start
 390                    || excerpt.row_range.end > cursor_row_range.end
 391            });
 392        }
 393    }
 394    related_files.retain(|file| !file.excerpts.is_empty());
 395    related_files
 396}
 397
 398pub fn get_prefill_for_format(
 399    format: ZetaFormat,
 400    context: &str,
 401    editable_range: &Range<usize>,
 402) -> String {
 403    match format {
 404        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 405        ZetaFormat::V0112MiddleAtEnd
 406        | ZetaFormat::V0113Ordered
 407        | ZetaFormat::V0114180EditableRegion
 408        | ZetaFormat::V0120GitMergeMarkers
 409        | ZetaFormat::V0131GitMergeMarkersPrefix
 410        | ZetaFormat::V0211SeedCoder
 411        | ZetaFormat::v0226Hashline => String::new(),
 412    }
 413}
 414
 415pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 416    match format {
 417        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 418        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 419        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 420        ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER),
 421        ZetaFormat::V0112MiddleAtEnd
 422        | ZetaFormat::V0113Ordered
 423        | ZetaFormat::V0114180EditableRegion
 424        | ZetaFormat::v0226Hashline => None,
 425    }
 426}
 427
 428pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
 429    match format {
 430        ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
 431        ZetaFormat::V0113Ordered
 432        | ZetaFormat::V0114180EditableRegion
 433        | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
 434        ZetaFormat::V0120GitMergeMarkers
 435        | ZetaFormat::V0131GitMergeMarkersPrefix
 436        | ZetaFormat::V0211Prefill => (
 437            v0120_git_merge_markers::START_MARKER,
 438            v0120_git_merge_markers::SEPARATOR,
 439        ),
 440        ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR),
 441    }
 442}
 443
 444pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
 445    match format {
 446        ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
 447        _ => region.to_string(),
 448    }
 449}
 450
 451pub fn encode_patch_as_output_for_format(
 452    format: ZetaFormat,
 453    old_editable_region: &str,
 454    patch: &str,
 455    cursor_offset: Option<usize>,
 456) -> Result<Option<String>> {
 457    match format {
 458        ZetaFormat::v0226Hashline => {
 459            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 460        }
 461        _ => Ok(None),
 462    }
 463}
 464
 465pub fn output_with_context_for_format(
 466    format: ZetaFormat,
 467    old_editable_region: &str,
 468    output: &str,
 469) -> Result<Option<String>> {
 470    match format {
 471        ZetaFormat::v0226Hashline => {
 472            if hashline::output_has_edit_commands(output) {
 473                Ok(Some(hashline::apply_edit_commands(
 474                    old_editable_region,
 475                    output,
 476                )))
 477            } else {
 478                Ok(None)
 479            }
 480        }
 481        _ => Ok(None),
 482    }
 483}
 484
 485/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 486pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 487    match output_end_marker_for_format(format) {
 488        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 489        None => output,
 490    }
 491}
 492
 493pub fn excerpt_range_for_format(
 494    format: ZetaFormat,
 495    ranges: &ExcerptRanges,
 496) -> (Range<usize>, Range<usize>) {
 497    excerpt_ranges_for_format(format, ranges)
 498}
 499
 500pub fn resolve_cursor_region(
 501    input: &ZetaPromptInput,
 502    format: ZetaFormat,
 503) -> (&str, Range<usize>, Range<usize>, usize) {
 504    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 505    let context_start = context_range.start;
 506    let context_text = &input.cursor_excerpt[context_range.clone()];
 507    let adjusted_editable =
 508        (editable_range.start - context_start)..(editable_range.end - context_start);
 509    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 510    let adjusted_context =
 511        (context_range.start - context_start)..(context_range.end - context_start);
 512
 513    (
 514        context_text,
 515        adjusted_editable,
 516        adjusted_context,
 517        adjusted_cursor,
 518    )
 519}
 520
 521pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 522    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 523    get_prefill_for_format(format, context, &editable_range)
 524}
 525
 526fn format_edit_history_within_budget(
 527    events: &[Arc<Event>],
 528    file_marker: &str,
 529    edit_history_name: &str,
 530    max_tokens: usize,
 531) -> String {
 532    let header = format!("{}{}\n", file_marker, edit_history_name);
 533    let header_tokens = estimate_tokens(header.len());
 534    if header_tokens >= max_tokens {
 535        return String::new();
 536    }
 537
 538    let mut event_strings: Vec<String> = Vec::new();
 539    let mut total_tokens = header_tokens;
 540
 541    for event in events.iter().rev() {
 542        let mut event_str = String::new();
 543        write_event(&mut event_str, event);
 544        let event_tokens = estimate_tokens(event_str.len());
 545
 546        if total_tokens + event_tokens > max_tokens {
 547            break;
 548        }
 549        total_tokens += event_tokens;
 550        event_strings.push(event_str);
 551    }
 552
 553    if event_strings.is_empty() {
 554        return String::new();
 555    }
 556
 557    let mut result = header;
 558    for event_str in event_strings.iter().rev() {
 559        result.push_str(event_str);
 560    }
 561    result
 562}
 563
 564fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 565    let needs_newline = !excerpt.text.ends_with('\n');
 566    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 567    let len = excerpt.text.len()
 568        + if needs_newline { "\n".len() } else { 0 }
 569        + if needs_ellipsis { "...\n".len() } else { 0 };
 570    estimate_tokens(len)
 571}
 572
 573pub fn format_related_files_within_budget(
 574    related_files: &[RelatedFile],
 575    file_prefix: &str,
 576    file_suffix: &str,
 577    max_tokens: usize,
 578) -> String {
 579    struct ExcerptCandidate {
 580        file_ix: usize,
 581        excerpt_ix: usize,
 582        order: usize,
 583    }
 584
 585    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 586        .iter()
 587        .enumerate()
 588        .flat_map(|(file_ix, file)| {
 589            file.excerpts
 590                .iter()
 591                .enumerate()
 592                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 593                    file_ix,
 594                    excerpt_ix,
 595                    order: e.order,
 596                })
 597        })
 598        .collect();
 599
 600    // Pre-compute file header strings and their token costs.
 601    let file_headers: Vec<String> = related_files
 602        .iter()
 603        .map(|file| {
 604            let path_str = file.path.to_string_lossy();
 605            format!("{}{}\n", file_prefix, path_str)
 606        })
 607        .collect();
 608
 609    // Sort the excerpts by their order and determine how many fit within the budget.
 610    let mut total_tokens = 0;
 611    let mut included_excerpt_count = 0_usize;
 612    let mut included_file_indices = vec![false; related_files.len()];
 613    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 614    for candidate in &excerpt_candidates {
 615        let file = &related_files[candidate.file_ix];
 616        let excerpt = &file.excerpts[candidate.excerpt_ix];
 617        let file_already_included = included_file_indices[candidate.file_ix];
 618        let header_cost = if file_already_included {
 619            0
 620        } else {
 621            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 622        };
 623        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 624        if total_tokens + header_cost + excerpt_cost > max_tokens {
 625            break;
 626        }
 627        total_tokens += header_cost + excerpt_cost;
 628        if !file_already_included {
 629            included_file_indices[candidate.file_ix] = true;
 630        }
 631        included_excerpt_count += 1;
 632    }
 633
 634    excerpt_candidates.truncate(included_excerpt_count);
 635    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 636
 637    // Render all of the files that fit within the token budget, in the original order.
 638    let mut result = String::new();
 639    let mut last_file_ix = None;
 640    for candidate in &excerpt_candidates {
 641        if last_file_ix != Some(candidate.file_ix) {
 642            if last_file_ix.is_some() {
 643                result.push_str(file_suffix);
 644            }
 645            result.push_str(&file_headers[candidate.file_ix]);
 646            last_file_ix = Some(candidate.file_ix);
 647        }
 648        let file = &related_files[candidate.file_ix];
 649        let excerpt = &file.excerpts[candidate.excerpt_ix];
 650        result.push_str(&excerpt.text);
 651        if !result.ends_with('\n') {
 652            result.push('\n');
 653        }
 654        if excerpt.row_range.end < file.max_row {
 655            result.push_str("...\n");
 656        }
 657    }
 658
 659    result
 660}
 661
 662pub fn write_related_files(
 663    prompt: &mut String,
 664    related_files: &[RelatedFile],
 665) -> Vec<Range<usize>> {
 666    let mut ranges = Vec::new();
 667    for file in related_files {
 668        let start = prompt.len();
 669        let path_str = file.path.to_string_lossy();
 670        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 671        for excerpt in &file.excerpts {
 672            prompt.push_str(&excerpt.text);
 673            if !prompt.ends_with('\n') {
 674                prompt.push('\n');
 675            }
 676            if excerpt.row_range.end < file.max_row {
 677                prompt.push_str("...\n");
 678            }
 679        }
 680        let end = prompt.len();
 681        ranges.push(start..end);
 682    }
 683    ranges
 684}
 685
 686mod v0112_middle_at_end {
 687    use super::*;
 688
 689    pub fn special_tokens() -> &'static [&'static str] {
 690        &[
 691            "<|fim_prefix|>",
 692            "<|fim_suffix|>",
 693            "<|fim_middle|>",
 694            "<|file_sep|>",
 695            CURSOR_MARKER,
 696        ]
 697    }
 698
 699    pub fn write_cursor_excerpt_section(
 700        prompt: &mut String,
 701        path: &Path,
 702        context: &str,
 703        editable_range: &Range<usize>,
 704        cursor_offset: usize,
 705    ) {
 706        let path_str = path.to_string_lossy();
 707        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 708
 709        prompt.push_str("<|fim_prefix|>\n");
 710        prompt.push_str(&context[..editable_range.start]);
 711
 712        prompt.push_str("<|fim_suffix|>\n");
 713        prompt.push_str(&context[editable_range.end..]);
 714        if !prompt.ends_with('\n') {
 715            prompt.push('\n');
 716        }
 717
 718        prompt.push_str("<|fim_middle|>current\n");
 719        prompt.push_str(&context[editable_range.start..cursor_offset]);
 720        prompt.push_str(CURSOR_MARKER);
 721        prompt.push_str(&context[cursor_offset..editable_range.end]);
 722        if !prompt.ends_with('\n') {
 723            prompt.push('\n');
 724        }
 725
 726        prompt.push_str("<|fim_middle|>updated\n");
 727    }
 728}
 729
 730mod v0113_ordered {
 731    use super::*;
 732
 733    pub fn special_tokens() -> &'static [&'static str] {
 734        &[
 735            "<|fim_prefix|>",
 736            "<|fim_suffix|>",
 737            "<|fim_middle|>",
 738            "<|file_sep|>",
 739            CURSOR_MARKER,
 740        ]
 741    }
 742
 743    pub fn write_cursor_excerpt_section(
 744        prompt: &mut String,
 745        path: &Path,
 746        context: &str,
 747        editable_range: &Range<usize>,
 748        cursor_offset: usize,
 749    ) {
 750        let path_str = path.to_string_lossy();
 751        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 752
 753        prompt.push_str("<|fim_prefix|>\n");
 754        prompt.push_str(&context[..editable_range.start]);
 755        if !prompt.ends_with('\n') {
 756            prompt.push('\n');
 757        }
 758
 759        prompt.push_str("<|fim_middle|>current\n");
 760        prompt.push_str(&context[editable_range.start..cursor_offset]);
 761        prompt.push_str(CURSOR_MARKER);
 762        prompt.push_str(&context[cursor_offset..editable_range.end]);
 763        if !prompt.ends_with('\n') {
 764            prompt.push('\n');
 765        }
 766
 767        prompt.push_str("<|fim_suffix|>\n");
 768        prompt.push_str(&context[editable_range.end..]);
 769        if !prompt.ends_with('\n') {
 770            prompt.push('\n');
 771        }
 772
 773        prompt.push_str("<|fim_middle|>updated\n");
 774    }
 775}
 776
 777mod v0114180_editable_region {
 778    use super::*;
 779
 780    pub fn special_tokens() -> &'static [&'static str] {
 781        v0113_ordered::special_tokens()
 782    }
 783}
 784
 785pub mod v0120_git_merge_markers {
 786    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 787    //!
 788    //! Example prompt:
 789    //!
 790    //! <|file_sep|>path/to/target_file.py
 791    //! <|fim_prefix|>
 792    //! code before editable region
 793    //! <|fim_suffix|>
 794    //! code after editable region
 795    //! <|fim_middle|>
 796    //! <<<<<<< CURRENT
 797    //! code that
 798    //! needs to<|user_cursor|>
 799    //! be rewritten
 800    //! =======
 801    //!
 802    //! Expected output (should be generated by the model):
 803    //!
 804    //! updated
 805    //! code with
 806    //! changes applied
 807    //! >>>>>>> UPDATED
 808
 809    use super::*;
 810
 811    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 812    pub const SEPARATOR: &str = "=======\n";
 813    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 814
 815    pub fn special_tokens() -> &'static [&'static str] {
 816        &[
 817            "<|fim_prefix|>",
 818            "<|fim_suffix|>",
 819            "<|fim_middle|>",
 820            "<|file_sep|>",
 821            START_MARKER,
 822            SEPARATOR,
 823            END_MARKER,
 824            CURSOR_MARKER,
 825        ]
 826    }
 827
 828    pub fn write_cursor_excerpt_section(
 829        prompt: &mut String,
 830        path: &Path,
 831        context: &str,
 832        editable_range: &Range<usize>,
 833        cursor_offset: usize,
 834    ) {
 835        let path_str = path.to_string_lossy();
 836        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 837
 838        prompt.push_str("<|fim_prefix|>");
 839        prompt.push_str(&context[..editable_range.start]);
 840
 841        prompt.push_str("<|fim_suffix|>");
 842        prompt.push_str(&context[editable_range.end..]);
 843        if !prompt.ends_with('\n') {
 844            prompt.push('\n');
 845        }
 846
 847        prompt.push_str("<|fim_middle|>");
 848        prompt.push_str(START_MARKER);
 849        prompt.push_str(&context[editable_range.start..cursor_offset]);
 850        prompt.push_str(CURSOR_MARKER);
 851        prompt.push_str(&context[cursor_offset..editable_range.end]);
 852        if !prompt.ends_with('\n') {
 853            prompt.push('\n');
 854        }
 855        prompt.push_str(SEPARATOR);
 856    }
 857}
 858
 859pub mod v0131_git_merge_markers_prefix {
 860    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 861    //!
 862    //! Example prompt:
 863    //!
 864    //! <|file_sep|>path/to/target_file.py
 865    //! <|fim_prefix|>
 866    //! code before editable region
 867    //! <<<<<<< CURRENT
 868    //! code that
 869    //! needs to<|user_cursor|>
 870    //! be rewritten
 871    //! =======
 872    //! <|fim_suffix|>
 873    //! code after editable region
 874    //! <|fim_middle|>
 875    //!
 876    //! Expected output (should be generated by the model):
 877    //!
 878    //! updated
 879    //! code with
 880    //! changes applied
 881    //! >>>>>>> UPDATED
 882
 883    use super::*;
 884
 885    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 886    pub const SEPARATOR: &str = "=======\n";
 887    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 888
 889    pub fn special_tokens() -> &'static [&'static str] {
 890        &[
 891            "<|fim_prefix|>",
 892            "<|fim_suffix|>",
 893            "<|fim_middle|>",
 894            "<|file_sep|>",
 895            START_MARKER,
 896            SEPARATOR,
 897            END_MARKER,
 898            CURSOR_MARKER,
 899        ]
 900    }
 901
 902    pub fn write_cursor_excerpt_section(
 903        prompt: &mut String,
 904        path: &Path,
 905        context: &str,
 906        editable_range: &Range<usize>,
 907        cursor_offset: usize,
 908    ) {
 909        let path_str = path.to_string_lossy();
 910        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 911
 912        prompt.push_str("<|fim_prefix|>");
 913        prompt.push_str(&context[..editable_range.start]);
 914        prompt.push_str(START_MARKER);
 915        prompt.push_str(&context[editable_range.start..cursor_offset]);
 916        prompt.push_str(CURSOR_MARKER);
 917        prompt.push_str(&context[cursor_offset..editable_range.end]);
 918        if !prompt.ends_with('\n') {
 919            prompt.push('\n');
 920        }
 921        prompt.push_str(SEPARATOR);
 922
 923        prompt.push_str("<|fim_suffix|>");
 924        prompt.push_str(&context[editable_range.end..]);
 925        if !prompt.ends_with('\n') {
 926            prompt.push('\n');
 927        }
 928
 929        prompt.push_str("<|fim_middle|>");
 930    }
 931}
 932
 933pub mod v0211_prefill {
 934    use super::*;
 935
 936    pub fn special_tokens() -> &'static [&'static str] {
 937        v0131_git_merge_markers_prefix::special_tokens()
 938    }
 939
 940    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 941        let editable_region = &context[editable_range.start..editable_range.end];
 942
 943        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 944        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 945
 946        // Find a token boundary to avoid splitting tokens in the prefill.
 947        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 948        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 949        // the \n and consume any consecutive \n characters after it.
 950        let prefill = &editable_region[..prefill_len];
 951        match prefill.rfind('\n') {
 952            Some(pos) => {
 953                let mut end = pos + 1;
 954                while end < editable_region.len()
 955                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 956                {
 957                    end += 1;
 958                }
 959                editable_region[..end].to_string()
 960            }
 961            // No newline found. Fall back to splitting before the last space
 962            // (word-level boundary)
 963            None => match prefill.rfind(' ') {
 964                Some(pos) => prefill[..pos].to_string(),
 965                None => prefill.to_string(),
 966            },
 967        }
 968    }
 969}
 970
 971pub mod hashline {
 972
 973    use std::fmt::Display;
 974
 975    pub const END_MARKER: &str = "<|fim_middle|>updated";
 976    pub const START_MARKER: &str = "<|fim_middle|>current";
 977
 978    use super::*;
 979
 980    const SET_COMMAND_MARKER: &str = "<|set|>";
 981    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
 982
 983    pub fn special_tokens() -> &'static [&'static str] {
 984        return &[
 985            SET_COMMAND_MARKER,
 986            "<|set_range|>",
 987            INSERT_COMMAND_MARKER,
 988            CURSOR_MARKER,
 989            "<|file_sep|>",
 990            "<|fim_prefix|>",
 991            "<|fim_suffix|>",
 992            "<|fim_middle|>",
 993        ];
 994    }
 995
 996    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
 997    #[derive(Debug, Clone, PartialEq, Eq)]
 998    struct LineRef {
 999        index: usize,
1000        hash: u8,
1001    }
1002
1003    impl Display for LineRef {
1004        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1005            write!(f, "{}:{:02x}", self.index, self.hash)
1006        }
1007    }
1008
1009    pub fn hash_line(line: &[u8]) -> u8 {
1010        let mut h: u8 = 0;
1011        for &byte in line {
1012            h = h.wrapping_add(byte);
1013        }
1014        return h;
1015    }
1016
1017    /// Write the hashline-encoded editable region into `out`. Each line of
1018    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1019    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1020    /// to the start of `editable_text`).
1021    pub fn write_hashline_editable_region(
1022        out: &mut String,
1023        editable_text: &str,
1024        cursor_offset_in_editable: usize,
1025    ) {
1026        let mut offset = 0;
1027        for (i, line) in editable_text.lines().enumerate() {
1028            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1029                && cursor_offset_in_editable < offset + line.len()
1030            {
1031                (
1032                    &line[..cursor_offset_in_editable - offset],
1033                    CURSOR_MARKER,
1034                    &line[cursor_offset_in_editable - offset..],
1035                )
1036            } else {
1037                (line, "", "")
1038            };
1039            write!(
1040                out,
1041                "\n{}|{head}{cursor}{tail}",
1042                LineRef {
1043                    index: i,
1044                    hash: hash_line(line.as_bytes())
1045                }
1046            )
1047            .unwrap();
1048            offset += line.len() + 1;
1049        }
1050    }
1051
1052    pub fn write_cursor_excerpt_section(
1053        prompt: &mut String,
1054        path: &Path,
1055        context: &str,
1056        editable_range: &Range<usize>,
1057        cursor_offset: usize,
1058    ) {
1059        let path_str = path.to_string_lossy();
1060        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1061
1062        prompt.push_str("<|fim_prefix|>\n");
1063        prompt.push_str(&context[..editable_range.start]);
1064        prompt.push_str(START_MARKER);
1065
1066        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1067        let editable_region = &context[editable_range.clone()];
1068        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1069
1070        if !prompt.ends_with('\n') {
1071            prompt.push('\n');
1072        }
1073
1074        prompt.push_str("<|fim_suffix|>\n");
1075        prompt.push_str(&context[editable_range.end..]);
1076        if !prompt.ends_with('\n') {
1077            prompt.push('\n');
1078        }
1079
1080        prompt.push_str(END_MARKER);
1081    }
1082
1083    /// A single edit command parsed from the model output.
1084    #[derive(Debug)]
1085    enum EditCommand<'a> {
1086        /// Replace a range of lines (inclusive on both ends). Single-line set is
1087        /// represented by `start == end`.
1088        Set {
1089            start: LineRef,
1090            end: LineRef,
1091            content: &'a str,
1092        },
1093        /// Insert new lines after the given line, or before the first line if
1094        /// `after` is `None`.
1095        Insert {
1096            after: Option<LineRef>,
1097            content: &'a str,
1098        },
1099    }
1100
1101    /// Parse a line reference like `3:c3` into a `LineRef`.
1102    fn parse_line_ref(s: &str) -> Option<LineRef> {
1103        let (idx_str, hash_str) = s.split_once(':')?;
1104        let index = idx_str.parse::<usize>().ok()?;
1105        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1106        Some(LineRef { index, hash })
1107    }
1108
1109    /// Parse the model output into a list of `EditCommand`s.
1110    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1111        let mut commands = Vec::new();
1112        let mut offset = 0usize;
1113
1114        while offset < model_output.len() {
1115            let next_nl = model_output[offset..]
1116                .find('\n')
1117                .map(|i| offset + i)
1118                .unwrap_or(model_output.len());
1119            let line = &model_output[offset..next_nl];
1120            let line_end = if next_nl < model_output.len() {
1121                next_nl + 1
1122            } else {
1123                next_nl
1124            };
1125
1126            let trimmed = line.trim();
1127            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1128                (true, spec)
1129            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1130                (false, spec)
1131            } else {
1132                offset = line_end;
1133                continue;
1134            };
1135
1136            let mut content_end = line_end;
1137            let mut scan = line_end;
1138
1139            while scan < model_output.len() {
1140                let body_nl = model_output[scan..]
1141                    .find('\n')
1142                    .map(|i| scan + i)
1143                    .unwrap_or(model_output.len());
1144                let body_line = &model_output[scan..body_nl];
1145                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1146                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1147                {
1148                    break;
1149                }
1150                scan = if body_nl < model_output.len() {
1151                    body_nl + 1
1152                } else {
1153                    body_nl
1154                };
1155                content_end = scan;
1156            }
1157
1158            let content = &model_output[line_end..content_end];
1159
1160            if is_set {
1161                if let Some((start_str, end_str)) = specifier.split_once('-') {
1162                    if let (Some(start), Some(end)) =
1163                        (parse_line_ref(start_str), parse_line_ref(end_str))
1164                    {
1165                        commands.push(EditCommand::Set {
1166                            start,
1167                            end,
1168                            content,
1169                        });
1170                    }
1171                } else if let Some(target) = parse_line_ref(specifier) {
1172                    commands.push(EditCommand::Set {
1173                        start: target.clone(),
1174                        end: target,
1175                        content,
1176                    });
1177                }
1178            } else {
1179                let after = parse_line_ref(specifier);
1180                commands.push(EditCommand::Insert { after, content });
1181            }
1182
1183            offset = scan;
1184        }
1185
1186        commands
1187    }
1188
1189    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1190    /// (as opposed to being a plain full-replacement output).
1191    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1192    /// editable region, returning the plain text content.
1193    pub fn strip_hashline_prefixes(region: &str) -> String {
1194        let mut decoded: String = region
1195            .lines()
1196            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1197            .collect::<Vec<_>>()
1198            .join("\n");
1199        if region.ends_with('\n') {
1200            decoded.push('\n');
1201        }
1202        decoded
1203    }
1204
1205    pub fn output_has_edit_commands(model_output: &str) -> bool {
1206        model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1207    }
1208
1209    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1210    /// original editable region text.
1211    ///
1212    /// `editable_region` is the original text of the editable region (without hash
1213    /// prefixes). `model_output` is the raw model response containing edit commands.
1214    ///
1215    /// Returns the full replacement text for the editable region.
1216    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1217        let original_lines: Vec<&str> = editable_region.lines().collect();
1218        let old_hashes: Vec<u8> = original_lines
1219            .iter()
1220            .map(|line| hash_line(line.as_bytes()))
1221            .collect();
1222
1223        let commands = parse_edit_commands(model_output);
1224
1225        // For set operations: indexed by start line → Some((end line index, content))
1226        // For insert operations: indexed by line index → vec of content to insert after
1227        // Insert-before-first is tracked separately.
1228        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1229        let mut insert_before_first: Vec<&str> = Vec::new();
1230        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1231
1232        for command in &commands {
1233            match command {
1234                EditCommand::Set {
1235                    start,
1236                    end,
1237                    content,
1238                } => {
1239                    if start.index < old_hashes.len()
1240                        && end.index < old_hashes.len()
1241                        && start.index <= end.index
1242                        && old_hashes[start.index] == start.hash
1243                        && old_hashes[end.index] == end.hash
1244                    {
1245                        set_ops[start.index] = Some((end.index, *content));
1246                    }
1247                }
1248                EditCommand::Insert { after, content } => match after {
1249                    None => insert_before_first.push(*content),
1250                    Some(line_ref) => {
1251                        if line_ref.index < old_hashes.len()
1252                            && old_hashes[line_ref.index] == line_ref.hash
1253                        {
1254                            insert_after[line_ref.index].push(*content);
1255                        }
1256                    }
1257                },
1258            }
1259        }
1260
1261        let mut result = String::new();
1262
1263        // Emit any insertions before the first line
1264        for content in &insert_before_first {
1265            result.push_str(content);
1266            if !content.ends_with('\n') {
1267                result.push('\n');
1268            }
1269        }
1270
1271        let mut i = 0;
1272        while i < original_lines.len() {
1273            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1274                // Replace lines i..=end_index with the replacement content
1275                result.push_str(replacement);
1276                if !replacement.is_empty() && !replacement.ends_with('\n') {
1277                    result.push('\n');
1278                }
1279                // Emit any insertions after the end of this set range
1280                if *end_index < insert_after.len() {
1281                    for content in &insert_after[*end_index] {
1282                        result.push_str(content);
1283                        if !content.ends_with('\n') {
1284                            result.push('\n');
1285                        }
1286                    }
1287                }
1288                i = end_index + 1;
1289            } else {
1290                // Keep the original line
1291                result.push_str(original_lines[i]);
1292                result.push('\n');
1293                // Emit any insertions after this line
1294                for content in &insert_after[i] {
1295                    result.push_str(content);
1296                    if !content.ends_with('\n') {
1297                        result.push('\n');
1298                    }
1299                }
1300                i += 1;
1301            }
1302        }
1303
1304        // Preserve trailing newline behavior: if the original ended with a
1305        // newline the result already has one; if it didn't, trim the extra one
1306        // we added.
1307        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1308            result.pop();
1309        }
1310
1311        result
1312    }
1313
1314    /// Convert a unified diff patch into hashline edit commands.
1315    ///
1316    /// Parses the unified diff `patch` directly to determine which lines of
1317    /// `old_text` are deleted/replaced and what new lines are added, then emits
1318    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1319    /// `{index}:{hash}` identifiers.
1320    ///
1321    /// `cursor_offset` is an optional byte offset into the first hunk's new
1322    /// text (context + additions) where the cursor marker should be placed.
1323    pub fn patch_to_edit_commands(
1324        old_text: &str,
1325        patch: &str,
1326        cursor_offset: Option<usize>,
1327    ) -> Result<String> {
1328        let old_lines: Vec<&str> = old_text.lines().collect();
1329        let old_hashes: Vec<u8> = old_lines
1330            .iter()
1331            .map(|line| hash_line(line.as_bytes()))
1332            .collect();
1333
1334        let mut result = String::new();
1335        let mut first_hunk = true;
1336
1337        struct Hunk<'a> {
1338            line_range: Range<usize>,
1339            new_text_lines: Vec<&'a str>,
1340            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1341        }
1342
1343        // Parse the patch line by line. We only care about hunk headers,
1344        // context, deletions, and additions.
1345        let mut old_line_index: usize = 0;
1346        let mut current_hunk: Option<Hunk> = None;
1347        // Byte offset tracking within the hunk's new text for cursor placement.
1348        let mut new_text_byte_offset: usize = 0;
1349        // The line index of the last old line seen before/in the current hunk
1350        // (used for insert-after reference).
1351        let mut last_old_line_before_hunk: Option<usize> = None;
1352
1353        fn flush_hunk(
1354            hunk: Hunk,
1355            last_old_line: Option<usize>,
1356            result: &mut String,
1357            old_hashes: &[u8],
1358        ) {
1359            if hunk.line_range.is_empty() {
1360                // Pure insertion — reference the old line to insert after when in bounds.
1361                if let Some(after) = last_old_line
1362                    && let Some(&hash) = old_hashes.get(after)
1363                {
1364                    write!(
1365                        result,
1366                        "{INSERT_COMMAND_MARKER}{}\n",
1367                        LineRef { index: after, hash }
1368                    )
1369                    .unwrap();
1370                } else {
1371                    result.push_str(INSERT_COMMAND_MARKER);
1372                    result.push('\n');
1373                }
1374            } else {
1375                let start = hunk.line_range.start;
1376                let end_exclusive = hunk.line_range.end;
1377                let deleted_line_count = end_exclusive.saturating_sub(start);
1378
1379                if deleted_line_count == 1 {
1380                    if let Some(&hash) = old_hashes.get(start) {
1381                        write!(
1382                            result,
1383                            "{SET_COMMAND_MARKER}{}\n",
1384                            LineRef { index: start, hash }
1385                        )
1386                        .unwrap();
1387                    } else {
1388                        result.push_str(SET_COMMAND_MARKER);
1389                        result.push('\n');
1390                    }
1391                } else {
1392                    let end_inclusive = end_exclusive - 1;
1393                    match (
1394                        old_hashes.get(start).copied(),
1395                        old_hashes.get(end_inclusive).copied(),
1396                    ) {
1397                        (Some(start_hash), Some(end_hash)) => {
1398                            write!(
1399                                result,
1400                                "{SET_COMMAND_MARKER}{}-{}\n",
1401                                LineRef {
1402                                    index: start,
1403                                    hash: start_hash
1404                                },
1405                                LineRef {
1406                                    index: end_inclusive,
1407                                    hash: end_hash
1408                                }
1409                            )
1410                            .unwrap();
1411                        }
1412                        _ => {
1413                            result.push_str(SET_COMMAND_MARKER);
1414                            result.push('\n');
1415                        }
1416                    }
1417                }
1418            }
1419            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1420                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1421                    && line_offset == cursor_line_offset
1422                {
1423                    result.push_str(&line[..char_offset]);
1424                    result.push_str(CURSOR_MARKER);
1425                    result.push_str(&line[char_offset..]);
1426                    continue;
1427                }
1428
1429                result.push_str(line);
1430            }
1431        }
1432
1433        for raw_line in patch.split_inclusive('\n') {
1434            if raw_line.starts_with("@@") {
1435                // Flush any pending change hunk from a previous patch hunk.
1436                if let Some(hunk) = current_hunk.take() {
1437                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1438                }
1439
1440                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1441                // We intentionally do not trust old_start as a direct local index into `old_text`,
1442                // because some patches are produced against a larger file region and carry
1443                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1444                if first_hunk {
1445                    new_text_byte_offset = 0;
1446                    first_hunk = false;
1447                }
1448                continue;
1449            }
1450
1451            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1452                continue;
1453            }
1454            if raw_line.starts_with("\\ No newline") {
1455                continue;
1456            }
1457
1458            if raw_line.starts_with('-') {
1459                // Extend or start a change hunk with this deleted old line.
1460                match &mut current_hunk {
1461                    Some(Hunk {
1462                        line_range: range, ..
1463                    }) => range.end = old_line_index + 1,
1464                    None => {
1465                        current_hunk = Some(Hunk {
1466                            line_range: old_line_index..old_line_index + 1,
1467                            new_text_lines: Vec::new(),
1468                            cursor_line_offset_in_new_text: None,
1469                        });
1470                    }
1471                }
1472                old_line_index += 1;
1473            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1474                // Place cursor marker if cursor_offset falls within this line.
1475                let mut cursor_line_offset = None;
1476                if let Some(cursor_off) = cursor_offset
1477                    && (first_hunk
1478                        || cursor_off >= new_text_byte_offset
1479                            && cursor_off <= new_text_byte_offset + added_content.len())
1480                {
1481                    let line_offset = added_content.floor_char_boundary(
1482                        cursor_off
1483                            .saturating_sub(new_text_byte_offset)
1484                            .min(added_content.len()),
1485                    );
1486                    cursor_line_offset = Some(line_offset);
1487                }
1488
1489                new_text_byte_offset += added_content.len();
1490
1491                let hunk = current_hunk.get_or_insert(Hunk {
1492                    line_range: old_line_index..old_line_index,
1493                    new_text_lines: vec![],
1494                    cursor_line_offset_in_new_text: None,
1495                });
1496                hunk.new_text_lines.push(added_content);
1497                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1498                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1499            } else {
1500                // Context line (starts with ' ' or is empty).
1501                if let Some(hunk) = current_hunk.take() {
1502                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1503                }
1504                last_old_line_before_hunk = Some(old_line_index);
1505                old_line_index += 1;
1506                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1507                new_text_byte_offset += content.len();
1508            }
1509        }
1510
1511        // Flush final group.
1512        if let Some(hunk) = current_hunk.take() {
1513            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1514        }
1515
1516        // Trim a single trailing newline.
1517        if result.ends_with('\n') {
1518            result.pop();
1519        }
1520
1521        Ok(result)
1522    }
1523
1524    #[cfg(test)]
1525    mod tests {
1526        use super::*;
1527        use indoc::indoc;
1528
1529        #[test]
1530        fn test_format_cursor_region() {
1531            struct Case {
1532                name: &'static str,
1533                context: &'static str,
1534                editable_range: Range<usize>,
1535                cursor_offset: usize,
1536                expected: &'static str,
1537            }
1538
1539            let cases = [
1540                Case {
1541                    name: "basic_cursor_placement",
1542                    context: "hello world\n",
1543                    editable_range: 0..12,
1544                    cursor_offset: 5,
1545                    expected: indoc! {"
1546                    <|file_sep|>test.rs
1547                    <|fim_prefix|>
1548                    <|fim_middle|>current
1549                    0:5c|hello<|user_cursor|> world
1550                    <|fim_suffix|>
1551                    <|fim_middle|>updated"},
1552                },
1553                Case {
1554                    name: "multiline_cursor_on_second_line",
1555                    context: "aaa\nbbb\nccc\n",
1556                    editable_range: 0..12,
1557                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1558                    expected: indoc! {"
1559                    <|file_sep|>test.rs
1560                    <|fim_prefix|>
1561                    <|fim_middle|>current
1562                    0:23|aaa
1563                    1:26|b<|user_cursor|>bb
1564                    2:29|ccc
1565                    <|fim_suffix|>
1566                    <|fim_middle|>updated"},
1567                },
1568                Case {
1569                    name: "no_trailing_newline_in_context",
1570                    context: "line1\nline2",
1571                    editable_range: 0..11,
1572                    cursor_offset: 3,
1573                    expected: indoc! {"
1574                    <|file_sep|>test.rs
1575                    <|fim_prefix|>
1576                    <|fim_middle|>current
1577                    0:d9|lin<|user_cursor|>e1
1578                    1:da|line2
1579                    <|fim_suffix|>
1580                    <|fim_middle|>updated"},
1581                },
1582                Case {
1583                    name: "leading_newline_in_editable_region",
1584                    context: "\nabc\n",
1585                    editable_range: 0..5,
1586                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1587                    expected: indoc! {"
1588                    <|file_sep|>test.rs
1589                    <|fim_prefix|>
1590                    <|fim_middle|>current
1591                    0:00|
1592                    1:26|a<|user_cursor|>bc
1593                    <|fim_suffix|>
1594                    <|fim_middle|>updated"},
1595                },
1596                Case {
1597                    name: "with_suffix",
1598                    context: "abc\ndef",
1599                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1600                    cursor_offset: 2,
1601                    expected: indoc! {"
1602                    <|file_sep|>test.rs
1603                    <|fim_prefix|>
1604                    <|fim_middle|>current
1605                    0:26|ab<|user_cursor|>c
1606                    <|fim_suffix|>
1607                    def
1608                    <|fim_middle|>updated"},
1609                },
1610                Case {
1611                    name: "unicode_two_byte_chars",
1612                    context: "héllo\n",
1613                    editable_range: 0..7,
1614                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1615                    expected: indoc! {"
1616                    <|file_sep|>test.rs
1617                    <|fim_prefix|>
1618                    <|fim_middle|>current
1619                    0:1b|hé<|user_cursor|>llo
1620                    <|fim_suffix|>
1621                    <|fim_middle|>updated"},
1622                },
1623                Case {
1624                    name: "unicode_three_byte_chars",
1625                    context: "日本語\n",
1626                    editable_range: 0..10,
1627                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1628                    expected: indoc! {"
1629                    <|file_sep|>test.rs
1630                    <|fim_prefix|>
1631                    <|fim_middle|>current
1632                    0:80|日本<|user_cursor|>語
1633                    <|fim_suffix|>
1634                    <|fim_middle|>updated"},
1635                },
1636                Case {
1637                    name: "unicode_four_byte_chars",
1638                    context: "a🌍b\n",
1639                    editable_range: 0..7,
1640                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1641                    expected: indoc! {"
1642                    <|file_sep|>test.rs
1643                    <|fim_prefix|>
1644                    <|fim_middle|>current
1645                    0:6b|a🌍<|user_cursor|>b
1646                    <|fim_suffix|>
1647                    <|fim_middle|>updated"},
1648                },
1649                Case {
1650                    name: "cursor_at_start_of_region_not_placed",
1651                    context: "abc\n",
1652                    editable_range: 0..4,
1653                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1654                    expected: indoc! {"
1655                    <|file_sep|>test.rs
1656                    <|fim_prefix|>
1657                    <|fim_middle|>current
1658                    0:26|abc
1659                    <|fim_suffix|>
1660                    <|fim_middle|>updated"},
1661                },
1662                Case {
1663                    name: "cursor_at_end_of_line_not_placed",
1664                    context: "abc\ndef\n",
1665                    editable_range: 0..8,
1666                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1667                    expected: indoc! {"
1668                    <|file_sep|>test.rs
1669                    <|fim_prefix|>
1670                    <|fim_middle|>current
1671                    0:26|abc
1672                    1:2f|def
1673                    <|fim_suffix|>
1674                    <|fim_middle|>updated"},
1675                },
1676                Case {
1677                    name: "cursor_offset_relative_to_context_not_editable_region",
1678                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1679                    // write_cursor_excerpt_section must subtract it before comparing against
1680                    // per-line offsets within the editable region.
1681                    context: "pre\naaa\nbbb\nsuf\n",
1682                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1683                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1684                    expected: indoc! {"
1685                    <|file_sep|>test.rs
1686                    <|fim_prefix|>
1687                    pre
1688                    <|fim_middle|>current
1689                    0:23|aaa
1690                    1:26|b<|user_cursor|>bb
1691                    <|fim_suffix|>
1692                    suf
1693                    <|fim_middle|>updated"},
1694                },
1695            ];
1696
1697            for case in &cases {
1698                let mut prompt = String::new();
1699                hashline::write_cursor_excerpt_section(
1700                    &mut prompt,
1701                    Path::new("test.rs"),
1702                    case.context,
1703                    &case.editable_range,
1704                    case.cursor_offset,
1705                );
1706                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1707            }
1708        }
1709
1710        #[test]
1711        fn test_apply_edit_commands() {
1712            struct Case {
1713                name: &'static str,
1714                original: &'static str,
1715                model_output: &'static str,
1716                expected: &'static str,
1717            }
1718
1719            let cases = vec![
1720                Case {
1721                    name: "set_single_line",
1722                    original: indoc! {"
1723                    let mut total = 0;
1724                    for product in products {
1725                        total += ;
1726                    }
1727                    total
1728                "},
1729                    model_output: indoc! {"
1730                    <|set|>2:87
1731                        total += product.price;
1732                "},
1733                    expected: indoc! {"
1734                    let mut total = 0;
1735                    for product in products {
1736                        total += product.price;
1737                    }
1738                    total
1739                "},
1740                },
1741                Case {
1742                    name: "set_range",
1743                    original: indoc! {"
1744                    fn foo() {
1745                        let x = 1;
1746                        let y = 2;
1747                        let z = 3;
1748                    }
1749                "},
1750                    model_output: indoc! {"
1751                    <|set|>1:46-3:4a
1752                        let sum = 6;
1753                "},
1754                    expected: indoc! {"
1755                    fn foo() {
1756                        let sum = 6;
1757                    }
1758                "},
1759                },
1760                Case {
1761                    name: "insert_after_line",
1762                    original: indoc! {"
1763                    fn main() {
1764                        let x = 1;
1765                    }
1766                "},
1767                    model_output: indoc! {"
1768                    <|insert|>1:46
1769                        let y = 2;
1770                "},
1771                    expected: indoc! {"
1772                    fn main() {
1773                        let x = 1;
1774                        let y = 2;
1775                    }
1776                "},
1777                },
1778                Case {
1779                    name: "insert_before_first",
1780                    original: indoc! {"
1781                    let x = 1;
1782                    let y = 2;
1783                "},
1784                    model_output: indoc! {"
1785                    <|insert|>
1786                    use std::io;
1787                "},
1788                    expected: indoc! {"
1789                    use std::io;
1790                    let x = 1;
1791                    let y = 2;
1792                "},
1793                },
1794                Case {
1795                    name: "set_with_cursor_marker",
1796                    original: indoc! {"
1797                    fn main() {
1798                        println!();
1799                    }
1800                "},
1801                    model_output: indoc! {"
1802                    <|set|>1:34
1803                        eprintln!(\"<|user_cursor|>\");
1804                "},
1805                    expected: indoc! {"
1806                    fn main() {
1807                        eprintln!(\"<|user_cursor|>\");
1808                    }
1809                "},
1810                },
1811                Case {
1812                    name: "multiple_set_commands",
1813                    original: indoc! {"
1814                    aaa
1815                    bbb
1816                    ccc
1817                    ddd
1818                "},
1819                    model_output: indoc! {"
1820                    <|set|>0:23
1821                    AAA
1822                    <|set|>2:29
1823                    CCC
1824                "},
1825                    expected: indoc! {"
1826                    AAA
1827                    bbb
1828                    CCC
1829                    ddd
1830                "},
1831                },
1832                Case {
1833                    name: "set_range_multiline_replacement",
1834                    original: indoc! {"
1835                    fn handle_submit() {
1836                    }
1837
1838                    fn handle_keystroke() {
1839                "},
1840                    model_output: indoc! {"
1841                    <|set|>0:3f-1:7d
1842                    fn handle_submit(modal_state: &mut ModalState) {
1843                        <|user_cursor|>
1844                    }
1845                "},
1846                    expected: indoc! {"
1847                    fn handle_submit(modal_state: &mut ModalState) {
1848                        <|user_cursor|>
1849                    }
1850
1851                    fn handle_keystroke() {
1852                "},
1853                },
1854                Case {
1855                    name: "no_edit_commands_returns_original",
1856                    original: indoc! {"
1857                    hello
1858                    world
1859                "},
1860                    model_output: "some random text with no commands",
1861                    expected: indoc! {"
1862                    hello
1863                    world
1864                "},
1865                },
1866                Case {
1867                    name: "wrong_hash_set_ignored",
1868                    original: indoc! {"
1869                    aaa
1870                    bbb
1871                "},
1872                    model_output: indoc! {"
1873                    <|set|>0:ff
1874                    ZZZ
1875                "},
1876                    expected: indoc! {"
1877                    aaa
1878                    bbb
1879                "},
1880                },
1881                Case {
1882                    name: "insert_and_set_combined",
1883                    original: indoc! {"
1884                    alpha
1885                    beta
1886                    gamma
1887                "},
1888                    model_output: indoc! {"
1889                    <|set|>0:06
1890                    ALPHA
1891                    <|insert|>1:9c
1892                    beta_extra
1893                "},
1894                    expected: indoc! {"
1895                    ALPHA
1896                    beta
1897                    beta_extra
1898                    gamma
1899                "},
1900                },
1901                Case {
1902                    name: "no_trailing_newline_preserved",
1903                    original: "hello\nworld",
1904                    model_output: indoc! {"
1905                    <|set|>0:14
1906                    HELLO
1907                "},
1908                    expected: "HELLO\nworld",
1909                },
1910                Case {
1911                    name: "set_range_hash_mismatch_in_end_bound",
1912                    original: indoc! {"
1913                    one
1914                    two
1915                    three
1916                "},
1917                    model_output: indoc! {"
1918                    <|set|>0:42-2:ff
1919                    ONE_TWO_THREE
1920                "},
1921                    expected: indoc! {"
1922                    one
1923                    two
1924                    three
1925                "},
1926                },
1927                Case {
1928                    name: "set_range_start_greater_than_end_ignored",
1929                    original: indoc! {"
1930                    a
1931                    b
1932                    c
1933                "},
1934                    model_output: indoc! {"
1935                    <|set|>2:63-1:62
1936                    X
1937                "},
1938                    expected: indoc! {"
1939                    a
1940                    b
1941                    c
1942                "},
1943                },
1944                Case {
1945                    name: "insert_out_of_bounds_ignored",
1946                    original: indoc! {"
1947                    x
1948                    y
1949                "},
1950                    model_output: indoc! {"
1951                    <|insert|>99:aa
1952                    z
1953                "},
1954                    expected: indoc! {"
1955                    x
1956                    y
1957                "},
1958                },
1959                Case {
1960                    name: "set_out_of_bounds_ignored",
1961                    original: indoc! {"
1962                    x
1963                    y
1964                "},
1965                    model_output: indoc! {"
1966                    <|set|>99:aa
1967                    z
1968                "},
1969                    expected: indoc! {"
1970                    x
1971                    y
1972                "},
1973                },
1974                Case {
1975                    name: "malformed_set_command_ignored",
1976                    original: indoc! {"
1977                    alpha
1978                    beta
1979                "},
1980                    model_output: indoc! {"
1981                    <|set|>not-a-line-ref
1982                    UPDATED
1983                "},
1984                    expected: indoc! {"
1985                    alpha
1986                    beta
1987                "},
1988                },
1989                Case {
1990                    name: "malformed_insert_hash_treated_as_before_first",
1991                    original: indoc! {"
1992                    alpha
1993                    beta
1994                "},
1995                    model_output: indoc! {"
1996                    <|insert|>1:nothex
1997                    preamble
1998                "},
1999                    expected: indoc! {"
2000                    preamble
2001                    alpha
2002                    beta
2003                "},
2004                },
2005                Case {
2006                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2007                    original: indoc! {"
2008                    cat
2009                    dog
2010                "},
2011                    model_output: indoc! {"
2012                    <|set|>0:38
2013                    CAT
2014                    <|insert|>0:38
2015                    TAIL
2016                "},
2017                    expected: indoc! {"
2018                    CAT
2019                    TAIL
2020                    dog
2021                "},
2022                },
2023                Case {
2024                    name: "overlapping_set_ranges_last_wins",
2025                    original: indoc! {"
2026                    a
2027                    b
2028                    c
2029                    d
2030                "},
2031                    model_output: indoc! {"
2032                    <|set|>0:61-2:63
2033                    FIRST
2034                    <|set|>1:62-3:64
2035                    SECOND
2036                "},
2037                    expected: indoc! {"
2038                    FIRST
2039                    d
2040                "},
2041                },
2042                Case {
2043                    name: "insert_before_first_and_after_line",
2044                    original: indoc! {"
2045                    a
2046                    b
2047                "},
2048                    model_output: indoc! {"
2049                    <|insert|>
2050                    HEAD
2051                    <|insert|>0:61
2052                    MID
2053                "},
2054                    expected: indoc! {"
2055                    HEAD
2056                    a
2057                    MID
2058                    b
2059                "},
2060                },
2061            ];
2062
2063            for case in &cases {
2064                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2065                assert_eq!(result, case.expected, "failed case: {}", case.name);
2066            }
2067        }
2068
2069        #[test]
2070        fn test_output_has_edit_commands() {
2071            assert!(hashline::output_has_edit_commands(&format!(
2072                "{}0:ab\nnew",
2073                SET_COMMAND_MARKER
2074            )));
2075            assert!(hashline::output_has_edit_commands(&format!(
2076                "{}0:ab\nnew",
2077                INSERT_COMMAND_MARKER
2078            )));
2079            assert!(hashline::output_has_edit_commands(&format!(
2080                "some text\n{}1:cd\nstuff",
2081                SET_COMMAND_MARKER
2082            )));
2083            assert!(!hashline::output_has_edit_commands("just plain text"));
2084            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2085        }
2086
2087        // ---- hashline::patch_to_edit_commands round-trip tests ----
2088
2089        #[test]
2090        fn test_patch_to_edit_commands() {
2091            struct Case {
2092                name: &'static str,
2093                old: &'static str,
2094                patch: &'static str,
2095                expected_new: &'static str,
2096            }
2097
2098            let cases = [
2099                Case {
2100                    name: "single_line_replacement",
2101                    old: indoc! {"
2102                    let mut total = 0;
2103                    for product in products {
2104                        total += ;
2105                    }
2106                    total
2107                "},
2108                    patch: indoc! {"
2109                    @@ -1,5 +1,5 @@
2110                     let mut total = 0;
2111                     for product in products {
2112                    -    total += ;
2113                    +    total += product.price;
2114                     }
2115                     total
2116                "},
2117                    expected_new: indoc! {"
2118                    let mut total = 0;
2119                    for product in products {
2120                        total += product.price;
2121                    }
2122                    total
2123                "},
2124                },
2125                Case {
2126                    name: "multiline_replacement",
2127                    old: indoc! {"
2128                    fn foo() {
2129                        let x = 1;
2130                        let y = 2;
2131                        let z = 3;
2132                    }
2133                "},
2134                    patch: indoc! {"
2135                    @@ -1,5 +1,3 @@
2136                     fn foo() {
2137                    -    let x = 1;
2138                    -    let y = 2;
2139                    -    let z = 3;
2140                    +    let sum = 1 + 2 + 3;
2141                     }
2142                "},
2143                    expected_new: indoc! {"
2144                    fn foo() {
2145                        let sum = 1 + 2 + 3;
2146                    }
2147                "},
2148                },
2149                Case {
2150                    name: "insertion",
2151                    old: indoc! {"
2152                    fn main() {
2153                        let x = 1;
2154                    }
2155                "},
2156                    patch: indoc! {"
2157                    @@ -1,3 +1,4 @@
2158                     fn main() {
2159                         let x = 1;
2160                    +    let y = 2;
2161                     }
2162                "},
2163                    expected_new: indoc! {"
2164                    fn main() {
2165                        let x = 1;
2166                        let y = 2;
2167                    }
2168                "},
2169                },
2170                Case {
2171                    name: "insertion_before_first",
2172                    old: indoc! {"
2173                    let x = 1;
2174                    let y = 2;
2175                "},
2176                    patch: indoc! {"
2177                    @@ -1,2 +1,3 @@
2178                    +use std::io;
2179                     let x = 1;
2180                     let y = 2;
2181                "},
2182                    expected_new: indoc! {"
2183                    use std::io;
2184                    let x = 1;
2185                    let y = 2;
2186                "},
2187                },
2188                Case {
2189                    name: "deletion",
2190                    old: indoc! {"
2191                    aaa
2192                    bbb
2193                    ccc
2194                    ddd
2195                "},
2196                    patch: indoc! {"
2197                    @@ -1,4 +1,2 @@
2198                     aaa
2199                    -bbb
2200                    -ccc
2201                     ddd
2202                "},
2203                    expected_new: indoc! {"
2204                    aaa
2205                    ddd
2206                "},
2207                },
2208                Case {
2209                    name: "multiple_changes",
2210                    old: indoc! {"
2211                    alpha
2212                    beta
2213                    gamma
2214                    delta
2215                    epsilon
2216                "},
2217                    patch: indoc! {"
2218                    @@ -1,5 +1,5 @@
2219                    -alpha
2220                    +ALPHA
2221                     beta
2222                     gamma
2223                    -delta
2224                    +DELTA
2225                     epsilon
2226                "},
2227                    expected_new: indoc! {"
2228                    ALPHA
2229                    beta
2230                    gamma
2231                    DELTA
2232                    epsilon
2233                "},
2234                },
2235                Case {
2236                    name: "replace_with_insertion",
2237                    old: indoc! {r#"
2238                    fn handle() {
2239                        modal_state.close();
2240                        modal_state.dismiss();
2241                "#},
2242                    patch: indoc! {r#"
2243                    @@ -1,3 +1,4 @@
2244                     fn handle() {
2245                         modal_state.close();
2246                    +    eprintln!("");
2247                         modal_state.dismiss();
2248                "#},
2249                    expected_new: indoc! {r#"
2250                    fn handle() {
2251                        modal_state.close();
2252                        eprintln!("");
2253                        modal_state.dismiss();
2254                "#},
2255                },
2256                Case {
2257                    name: "complete_replacement",
2258                    old: indoc! {"
2259                    aaa
2260                    bbb
2261                    ccc
2262                "},
2263                    patch: indoc! {"
2264                    @@ -1,3 +1,3 @@
2265                    -aaa
2266                    -bbb
2267                    -ccc
2268                    +xxx
2269                    +yyy
2270                    +zzz
2271                "},
2272                    expected_new: indoc! {"
2273                    xxx
2274                    yyy
2275                    zzz
2276                "},
2277                },
2278                Case {
2279                    name: "add_function_body",
2280                    old: indoc! {"
2281                    fn foo() {
2282                        modal_state.dismiss();
2283                    }
2284
2285                    fn
2286
2287                    fn handle_keystroke() {
2288                "},
2289                    patch: indoc! {"
2290                    @@ -1,6 +1,8 @@
2291                     fn foo() {
2292                         modal_state.dismiss();
2293                     }
2294
2295                    -fn
2296                    +fn handle_submit() {
2297                    +    todo()
2298                    +}
2299
2300                     fn handle_keystroke() {
2301                "},
2302                    expected_new: indoc! {"
2303                    fn foo() {
2304                        modal_state.dismiss();
2305                    }
2306
2307                    fn handle_submit() {
2308                        todo()
2309                    }
2310
2311                    fn handle_keystroke() {
2312                "},
2313                },
2314                Case {
2315                    name: "with_cursor_offset",
2316                    old: indoc! {r#"
2317                    fn main() {
2318                        println!();
2319                    }
2320                "#},
2321                    patch: indoc! {r#"
2322                    @@ -1,3 +1,3 @@
2323                     fn main() {
2324                    -    println!();
2325                    +    eprintln!("");
2326                     }
2327                "#},
2328                    expected_new: indoc! {r#"
2329                    fn main() {
2330                        eprintln!("<|user_cursor|>");
2331                    }
2332                "#},
2333                },
2334                Case {
2335                    name: "non_local_hunk_header_pure_insertion_repro",
2336                    old: indoc! {"
2337                    aaa
2338                    bbb
2339                "},
2340                    patch: indoc! {"
2341                    @@ -20,2 +20,3 @@
2342                     aaa
2343                    +xxx
2344                     bbb
2345                "},
2346                    expected_new: indoc! {"
2347                    aaa
2348                    xxx
2349                    bbb
2350                "},
2351                },
2352            ];
2353
2354            for case in &cases {
2355                // The cursor_offset for patch_to_edit_commands is relative to
2356                // the first hunk's new text (context + additions). We compute
2357                // it by finding where the marker sits in the expected output
2358                // (which mirrors the new text of the hunk).
2359                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2360
2361                let commands =
2362                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2363                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2364
2365                assert!(
2366                    hashline::output_has_edit_commands(&commands),
2367                    "case {}: expected edit commands, got: {commands:?}",
2368                    case.name,
2369                );
2370
2371                let applied = hashline::apply_edit_commands(case.old, &commands);
2372                assert_eq!(applied, case.expected_new, "case {}", case.name);
2373            }
2374        }
2375    }
2376}
2377
2378pub mod seed_coder {
2379    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2380    //!
2381    //! Seed-Coder uses different FIM tokens and order than Qwen:
2382    //! - SPM order: suffix comes FIRST, then prefix, then middle
2383    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2384    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2385    //!
2386    //! All context (related files, edit history) goes in the PREFIX section.
2387    //! The suffix contains only code after the editable region.
2388    //!
2389    //! Example prompt:
2390    //!
2391    //! <[fim-suffix]>
2392    //! code after editable region
2393    //! <[fim-prefix]><filename>related/file.py
2394    //! related file content
2395    //!
2396    //! <filename>edit_history
2397    //! --- a/some_file.py
2398    //! +++ b/some_file.py
2399    //! -old
2400    //! +new
2401    //!
2402    //! <filename>path/to/target_file.py
2403    //! code before editable region
2404    //! <<<<<<< CURRENT
2405    //! code that
2406    //! needs to<|user_cursor|>
2407    //! be rewritten
2408    //! =======
2409    //! <[fim-middle]>
2410    //!
2411    //! Expected output (model generates):
2412    //!
2413    //! updated
2414    //! code with
2415    //! changes applied
2416    //! >>>>>>> UPDATED
2417
2418    use super::*;
2419
2420    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2421    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2422    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2423    pub const FILE_MARKER: &str = "<filename>";
2424
2425    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2426    pub const SEPARATOR: &str = "=======\n";
2427    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2428
2429    pub fn special_tokens() -> &'static [&'static str] {
2430        &[
2431            FIM_SUFFIX,
2432            FIM_PREFIX,
2433            FIM_MIDDLE,
2434            FILE_MARKER,
2435            START_MARKER,
2436            SEPARATOR,
2437            END_MARKER,
2438            CURSOR_MARKER,
2439        ]
2440    }
2441
2442    pub fn write_cursor_excerpt_section(
2443        prompt: &mut String,
2444        path: &Path,
2445        context: &str,
2446        editable_range: &Range<usize>,
2447        cursor_offset: usize,
2448    ) {
2449        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2450        prompt.push_str(&section);
2451    }
2452
2453    pub fn format_prompt_with_budget(
2454        path: &Path,
2455        context: &str,
2456        editable_range: &Range<usize>,
2457        cursor_offset: usize,
2458        events: &[Arc<Event>],
2459        related_files: &[RelatedFile],
2460        max_tokens: usize,
2461    ) -> String {
2462        let suffix_section = build_suffix_section(context, editable_range);
2463        let cursor_prefix_section =
2464            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2465
2466        let suffix_tokens = estimate_tokens(suffix_section.len());
2467        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2468        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2469
2470        let edit_history_section = super::format_edit_history_within_budget(
2471            events,
2472            FILE_MARKER,
2473            "edit_history",
2474            budget_after_cursor,
2475        );
2476        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2477        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2478
2479        let related_files_section = super::format_related_files_within_budget(
2480            related_files,
2481            FILE_MARKER,
2482            "",
2483            budget_after_edit_history,
2484        );
2485
2486        let mut prompt = String::new();
2487        prompt.push_str(&suffix_section);
2488        prompt.push_str(FIM_PREFIX);
2489        prompt.push_str(&related_files_section);
2490        if !related_files_section.is_empty() {
2491            prompt.push('\n');
2492        }
2493        prompt.push_str(&edit_history_section);
2494        if !edit_history_section.is_empty() {
2495            prompt.push('\n');
2496        }
2497        prompt.push_str(&cursor_prefix_section);
2498        prompt.push_str(FIM_MIDDLE);
2499        prompt
2500    }
2501
2502    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2503        let mut section = String::new();
2504        section.push_str(FIM_SUFFIX);
2505        section.push_str(&context[editable_range.end..]);
2506        if !section.ends_with('\n') {
2507            section.push('\n');
2508        }
2509        section
2510    }
2511
2512    fn build_cursor_prefix_section(
2513        path: &Path,
2514        context: &str,
2515        editable_range: &Range<usize>,
2516        cursor_offset: usize,
2517    ) -> String {
2518        let mut section = String::new();
2519        let path_str = path.to_string_lossy();
2520        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2521
2522        section.push_str(&context[..editable_range.start]);
2523        section.push_str(START_MARKER);
2524        section.push_str(&context[editable_range.start..cursor_offset]);
2525        section.push_str(CURSOR_MARKER);
2526        section.push_str(&context[cursor_offset..editable_range.end]);
2527        if !section.ends_with('\n') {
2528            section.push('\n');
2529        }
2530        section.push_str(SEPARATOR);
2531        section
2532    }
2533}
2534
2535/// The zeta1 prompt format
2536pub mod zeta1 {
2537    use super::*;
2538    use std::fmt::Write;
2539
2540    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2541    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2542    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2543    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2544
2545    const INSTRUCTION_HEADER: &str = concat!(
2546        "### Instruction:\n",
2547        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2548        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2549        "into account the cursor location.\n\n",
2550        "### User Edits:\n\n"
2551    );
2552    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2553    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2554
2555    /// Formats a complete zeta1 prompt from the input events and excerpt.
2556    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2557        let mut prompt = String::with_capacity(
2558            INSTRUCTION_HEADER.len()
2559                + input_events.len()
2560                + EXCERPT_HEADER.len()
2561                + input_excerpt.len()
2562                + RESPONSE_HEADER.len(),
2563        );
2564        prompt.push_str(INSTRUCTION_HEADER);
2565        prompt.push_str(input_events);
2566        prompt.push_str(EXCERPT_HEADER);
2567        prompt.push_str(input_excerpt);
2568        prompt.push_str(RESPONSE_HEADER);
2569        prompt
2570    }
2571
2572    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2573    /// editable and context byte-offset ranges within `cursor_excerpt`.
2574    pub fn format_zeta1_from_input(
2575        input: &ZetaPromptInput,
2576        editable_range: Range<usize>,
2577        context_range: Range<usize>,
2578    ) -> String {
2579        let events = format_zeta1_events(&input.events);
2580        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2581        format_zeta1_prompt(&events, &excerpt)
2582    }
2583
2584    /// Formats events in zeta1 style (oldest first).
2585    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2586        let mut result = String::new();
2587        for event in events {
2588            let event_string = format_zeta1_event(event);
2589            if event_string.is_empty() {
2590                continue;
2591            }
2592            if !result.is_empty() {
2593                result.push_str("\n\n");
2594            }
2595            result.push_str(&event_string);
2596        }
2597        result
2598    }
2599
2600    fn format_zeta1_event(event: &Event) -> String {
2601        match event {
2602            Event::BufferChange {
2603                path,
2604                old_path,
2605                diff,
2606                ..
2607            } => {
2608                let mut prompt = String::new();
2609                if old_path != path {
2610                    writeln!(
2611                        prompt,
2612                        "User renamed {} to {}\n",
2613                        old_path.display(),
2614                        path.display()
2615                    )
2616                    .ok();
2617                }
2618                if !diff.is_empty() {
2619                    write!(
2620                        prompt,
2621                        "User edited {}:\n```diff\n{}\n```",
2622                        path.display(),
2623                        diff
2624                    )
2625                    .ok();
2626                }
2627                prompt
2628            }
2629        }
2630    }
2631
2632    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2633    /// within `cursor_excerpt`.
2634    fn format_zeta1_excerpt(
2635        input: &ZetaPromptInput,
2636        editable_range: Range<usize>,
2637        context_range: Range<usize>,
2638    ) -> String {
2639        let path_str = input.cursor_path.to_string_lossy();
2640        let excerpt = &*input.cursor_excerpt;
2641        let cursor_offset = input.cursor_offset_in_excerpt;
2642
2643        let mut prompt = String::new();
2644        writeln!(&mut prompt, "```{path_str}").ok();
2645
2646        let starts_at_file_beginning =
2647            input.excerpt_start_row == Some(0) && context_range.start == 0;
2648        if starts_at_file_beginning {
2649            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2650        }
2651
2652        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2653
2654        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2655        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2656        prompt.push_str(CURSOR_MARKER);
2657        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2658        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2659
2660        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2661        write!(prompt, "\n```").ok();
2662
2663        prompt
2664    }
2665
2666    /// Cleans zeta1 model output by extracting content between editable region
2667    /// markers and converting the zeta1 cursor marker to the universal one.
2668    /// Returns `None` if the output doesn't contain the expected markers.
2669    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2670        let content = output.replace(CURSOR_MARKER, "");
2671
2672        let content_start = content
2673            .find(EDITABLE_REGION_START_MARKER)
2674            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2675            .map(|pos| {
2676                if content.as_bytes().get(pos) == Some(&b'\n') {
2677                    pos + 1
2678                } else {
2679                    pos
2680                }
2681            })
2682            .unwrap_or(0);
2683
2684        let content_end = content
2685            .find(EDITABLE_REGION_END_MARKER)
2686            .map(|pos| {
2687                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2688                    pos - 1
2689                } else {
2690                    pos
2691                }
2692            })
2693            .unwrap_or(content.len());
2694
2695        if content_start > content_end {
2696            return Some(String::new());
2697        }
2698
2699        let extracted = &content[content_start..content_end];
2700
2701        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2702            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2703            let text_before_cursor = text_before_cursor
2704                .find(EDITABLE_REGION_START_MARKER)
2705                .map(|pos| {
2706                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2707                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2708                        after_marker + 1
2709                    } else {
2710                        after_marker
2711                    }
2712                })
2713                .unwrap_or(0);
2714            let offset_in_extracted = zeta1_cursor_pos
2715                .saturating_sub(text_before_cursor)
2716                .min(extracted.len());
2717            offset_in_extracted
2718        });
2719
2720        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2721        if let Some(offset) = cursor_offset {
2722            result.push_str(&extracted[..offset]);
2723            result.push_str(super::CURSOR_MARKER);
2724            result.push_str(&extracted[offset..]);
2725        } else {
2726            result.push_str(extracted);
2727        }
2728
2729        Some(result)
2730    }
2731}
2732
2733#[cfg(test)]
2734mod tests {
2735    use super::*;
2736    use indoc::indoc;
2737
2738    fn make_input(
2739        cursor_excerpt: &str,
2740        editable_range: Range<usize>,
2741        cursor_offset: usize,
2742        events: Vec<Event>,
2743        related_files: Vec<RelatedFile>,
2744    ) -> ZetaPromptInput {
2745        let context_range = 0..cursor_excerpt.len();
2746        ZetaPromptInput {
2747            cursor_path: Path::new("test.rs").into(),
2748            cursor_excerpt: cursor_excerpt.into(),
2749            cursor_offset_in_excerpt: cursor_offset,
2750            excerpt_start_row: None,
2751            events: events.into_iter().map(Arc::new).collect(),
2752            related_files,
2753            excerpt_ranges: ExcerptRanges {
2754                editable_150: editable_range.clone(),
2755                editable_180: editable_range.clone(),
2756                editable_350: editable_range,
2757                editable_150_context_350: context_range.clone(),
2758                editable_180_context_350: context_range.clone(),
2759                editable_350_context_150: context_range,
2760                ..Default::default()
2761            },
2762            experiment: None,
2763            in_open_source_repo: false,
2764            can_collect_data: false,
2765        }
2766    }
2767
2768    fn make_event(path: &str, diff: &str) -> Event {
2769        Event::BufferChange {
2770            path: Path::new(path).into(),
2771            old_path: Path::new(path).into(),
2772            diff: diff.to_string(),
2773            predicted: false,
2774            in_open_source_repo: false,
2775        }
2776    }
2777
2778    fn make_related_file(path: &str, content: &str) -> RelatedFile {
2779        RelatedFile {
2780            path: Path::new(path).into(),
2781            max_row: content.lines().count() as u32,
2782            excerpts: vec![RelatedExcerpt {
2783                row_range: 0..content.lines().count() as u32,
2784                text: content.into(),
2785                order: 0,
2786            }],
2787            in_open_source_repo: false,
2788        }
2789    }
2790
2791    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2792        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2793    }
2794
2795    #[test]
2796    fn test_no_truncation_when_within_budget() {
2797        let input = make_input(
2798            "prefix\neditable\nsuffix",
2799            7..15,
2800            10,
2801            vec![make_event("a.rs", "-old\n+new\n")],
2802            vec![make_related_file("related.rs", "fn helper() {}\n")],
2803        );
2804
2805        assert_eq!(
2806            format_with_budget(&input, 10000),
2807            indoc! {r#"
2808                <|file_sep|>related.rs
2809                fn helper() {}
2810                <|file_sep|>edit history
2811                --- a/a.rs
2812                +++ b/a.rs
2813                -old
2814                +new
2815                <|file_sep|>test.rs
2816                <|fim_prefix|>
2817                prefix
2818                <|fim_middle|>current
2819                edi<|user_cursor|>table
2820                <|fim_suffix|>
2821
2822                suffix
2823                <|fim_middle|>updated
2824            "#}
2825        );
2826    }
2827
2828    #[test]
2829    fn test_truncation_drops_edit_history_when_budget_tight() {
2830        let input = make_input(
2831            "code",
2832            0..4,
2833            2,
2834            vec![make_event("a.rs", "-x\n+y\n")],
2835            vec![
2836                make_related_file("r1.rs", "a\n"),
2837                make_related_file("r2.rs", "b\n"),
2838            ],
2839        );
2840
2841        assert_eq!(
2842            format_with_budget(&input, 10000),
2843            indoc! {r#"
2844                <|file_sep|>r1.rs
2845                a
2846                <|file_sep|>r2.rs
2847                b
2848                <|file_sep|>edit history
2849                --- a/a.rs
2850                +++ b/a.rs
2851                -x
2852                +y
2853                <|file_sep|>test.rs
2854                <|fim_prefix|>
2855                <|fim_middle|>current
2856                co<|user_cursor|>de
2857                <|fim_suffix|>
2858                <|fim_middle|>updated
2859            "#}
2860        );
2861
2862        assert_eq!(
2863            format_with_budget(&input, 50),
2864            indoc! {r#"
2865                <|file_sep|>r1.rs
2866                a
2867                <|file_sep|>r2.rs
2868                b
2869                <|file_sep|>test.rs
2870                <|fim_prefix|>
2871                <|fim_middle|>current
2872                co<|user_cursor|>de
2873                <|fim_suffix|>
2874                <|fim_middle|>updated
2875            "#}
2876        );
2877    }
2878
2879    #[test]
2880    fn test_truncation_includes_partial_excerpts() {
2881        let input = make_input(
2882            "x",
2883            0..1,
2884            0,
2885            vec![],
2886            vec![RelatedFile {
2887                path: Path::new("big.rs").into(),
2888                max_row: 30,
2889                in_open_source_repo: false,
2890                excerpts: vec![
2891                    RelatedExcerpt {
2892                        row_range: 0..10,
2893                        text: "first excerpt\n".into(),
2894                        order: 0,
2895                    },
2896                    RelatedExcerpt {
2897                        row_range: 10..20,
2898                        text: "second excerpt\n".into(),
2899                        order: 0,
2900                    },
2901                    RelatedExcerpt {
2902                        row_range: 20..30,
2903                        text: "third excerpt\n".into(),
2904                        order: 0,
2905                    },
2906                ],
2907            }],
2908        );
2909
2910        assert_eq!(
2911            format_with_budget(&input, 10000),
2912            indoc! {r#"
2913                <|file_sep|>big.rs
2914                first excerpt
2915                ...
2916                second excerpt
2917                ...
2918                third excerpt
2919                <|file_sep|>test.rs
2920                <|fim_prefix|>
2921                <|fim_middle|>current
2922                <|user_cursor|>x
2923                <|fim_suffix|>
2924                <|fim_middle|>updated
2925            "#}
2926        );
2927
2928        assert_eq!(
2929            format_with_budget(&input, 50),
2930            indoc! {r#"
2931                <|file_sep|>big.rs
2932                first excerpt
2933                ...
2934                <|file_sep|>test.rs
2935                <|fim_prefix|>
2936                <|fim_middle|>current
2937                <|user_cursor|>x
2938                <|fim_suffix|>
2939                <|fim_middle|>updated
2940            "#}
2941        );
2942    }
2943
2944    #[test]
2945    fn test_truncation_prioritizes_lower_order_excerpts() {
2946        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2947        // With tight budget, only the lower-order excerpt from file_b should be included.
2948        let input = make_input(
2949            "x",
2950            0..1,
2951            0,
2952            vec![],
2953            vec![
2954                RelatedFile {
2955                    path: Path::new("file_a.rs").into(),
2956                    max_row: 10,
2957                    in_open_source_repo: false,
2958                    excerpts: vec![RelatedExcerpt {
2959                        row_range: 0..10,
2960                        text: "low priority content\n".into(),
2961                        order: 5,
2962                    }],
2963                },
2964                RelatedFile {
2965                    path: Path::new("file_b.rs").into(),
2966                    max_row: 10,
2967                    in_open_source_repo: false,
2968                    excerpts: vec![RelatedExcerpt {
2969                        row_range: 0..10,
2970                        text: "high priority content\n".into(),
2971                        order: 1,
2972                    }],
2973                },
2974            ],
2975        );
2976
2977        // With large budget, both files included; rendered in stable lexicographic order.
2978        assert_eq!(
2979            format_with_budget(&input, 10000),
2980            indoc! {r#"
2981                <|file_sep|>file_a.rs
2982                low priority content
2983                <|file_sep|>file_b.rs
2984                high priority content
2985                <|file_sep|>test.rs
2986                <|fim_prefix|>
2987                <|fim_middle|>current
2988                <|user_cursor|>x
2989                <|fim_suffix|>
2990                <|fim_middle|>updated
2991            "#}
2992        );
2993
2994        // With tight budget, only file_b (lower order) fits.
2995        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2996        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2997        // file_a would need another 14 tokens, which doesn't fit.
2998        assert_eq!(
2999            format_with_budget(&input, 52),
3000            indoc! {r#"
3001                <|file_sep|>file_b.rs
3002                high priority content
3003                <|file_sep|>test.rs
3004                <|fim_prefix|>
3005                <|fim_middle|>current
3006                <|user_cursor|>x
3007                <|fim_suffix|>
3008                <|fim_middle|>updated
3009            "#}
3010        );
3011    }
3012
3013    #[test]
3014    fn test_truncation_drops_high_order_excerpts_within_file() {
3015        // A single file has excerpts at order 1 and order 3. With a tight budget,
3016        // only the order-1 excerpts are included while the order-3 excerpt is
3017        // dropped — even though they belong to the same file. This also preserves
3018        // the parent invariant: parent outline items have order ≤ their best
3019        // child, so they're always included when any child is.
3020        let input = make_input(
3021            "x",
3022            0..1,
3023            0,
3024            vec![],
3025            vec![RelatedFile {
3026                path: Path::new("mod.rs").into(),
3027                max_row: 30,
3028                in_open_source_repo: false,
3029                excerpts: vec![
3030                    RelatedExcerpt {
3031                        row_range: 0..5,
3032                        text: "mod header\n".into(),
3033                        order: 1,
3034                    },
3035                    RelatedExcerpt {
3036                        row_range: 5..15,
3037                        text: "important fn\n".into(),
3038                        order: 1,
3039                    },
3040                    RelatedExcerpt {
3041                        row_range: 15..30,
3042                        text: "less important fn\n".into(),
3043                        order: 3,
3044                    },
3045                ],
3046            }],
3047        );
3048
3049        // With large budget, all three excerpts included.
3050        assert_eq!(
3051            format_with_budget(&input, 10000),
3052            indoc! {r#"
3053                <|file_sep|>mod.rs
3054                mod header
3055                ...
3056                important fn
3057                ...
3058                less important fn
3059                <|file_sep|>test.rs
3060                <|fim_prefix|>
3061                <|fim_middle|>current
3062                <|user_cursor|>x
3063                <|fim_suffix|>
3064                <|fim_middle|>updated
3065            "#}
3066        );
3067
3068        // With tight budget, only order<=1 excerpts included (header + important fn).
3069        assert_eq!(
3070            format_with_budget(&input, 55),
3071            indoc! {r#"
3072                <|file_sep|>mod.rs
3073                mod header
3074                ...
3075                important fn
3076                ...
3077                <|file_sep|>test.rs
3078                <|fim_prefix|>
3079                <|fim_middle|>current
3080                <|user_cursor|>x
3081                <|fim_suffix|>
3082                <|fim_middle|>updated
3083            "#}
3084        );
3085    }
3086
3087    #[test]
3088    fn test_truncation_drops_older_events_first() {
3089        let input = make_input(
3090            "x",
3091            0..1,
3092            0,
3093            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3094            vec![],
3095        );
3096
3097        assert_eq!(
3098            format_with_budget(&input, 10000),
3099            indoc! {r#"
3100                <|file_sep|>edit history
3101                --- a/old.rs
3102                +++ b/old.rs
3103                -1
3104                --- a/new.rs
3105                +++ b/new.rs
3106                -2
3107                <|file_sep|>test.rs
3108                <|fim_prefix|>
3109                <|fim_middle|>current
3110                <|user_cursor|>x
3111                <|fim_suffix|>
3112                <|fim_middle|>updated
3113            "#}
3114        );
3115
3116        assert_eq!(
3117            format_with_budget(&input, 55),
3118            indoc! {r#"
3119                <|file_sep|>edit history
3120                --- a/new.rs
3121                +++ b/new.rs
3122                -2
3123                <|file_sep|>test.rs
3124                <|fim_prefix|>
3125                <|fim_middle|>current
3126                <|user_cursor|>x
3127                <|fim_suffix|>
3128                <|fim_middle|>updated
3129            "#}
3130        );
3131    }
3132
3133    #[test]
3134    fn test_cursor_excerpt_always_included_with_minimal_budget() {
3135        let input = make_input(
3136            "fn main() {}",
3137            0..12,
3138            3,
3139            vec![make_event("a.rs", "-old\n+new\n")],
3140            vec![make_related_file("related.rs", "helper\n")],
3141        );
3142
3143        assert_eq!(
3144            format_with_budget(&input, 30),
3145            indoc! {r#"
3146                <|file_sep|>test.rs
3147                <|fim_prefix|>
3148                <|fim_middle|>current
3149                fn <|user_cursor|>main() {}
3150                <|fim_suffix|>
3151                <|fim_middle|>updated
3152            "#}
3153        );
3154    }
3155
3156    fn format_seed_coder(input: &ZetaPromptInput) -> String {
3157        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3158    }
3159
3160    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3161        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3162    }
3163
3164    #[test]
3165    fn test_seed_coder_basic_format() {
3166        let input = make_input(
3167            "prefix\neditable\nsuffix",
3168            7..15,
3169            10,
3170            vec![make_event("a.rs", "-old\n+new\n")],
3171            vec![make_related_file("related.rs", "fn helper() {}\n")],
3172        );
3173
3174        assert_eq!(
3175            format_seed_coder(&input),
3176            indoc! {r#"
3177                <[fim-suffix]>
3178                suffix
3179                <[fim-prefix]><filename>related.rs
3180                fn helper() {}
3181
3182                <filename>edit_history
3183                --- a/a.rs
3184                +++ b/a.rs
3185                -old
3186                +new
3187
3188                <filename>test.rs
3189                prefix
3190                <<<<<<< CURRENT
3191                edi<|user_cursor|>table
3192                =======
3193                <[fim-middle]>"#}
3194        );
3195    }
3196
3197    #[test]
3198    fn test_seed_coder_no_context() {
3199        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3200
3201        assert_eq!(
3202            format_seed_coder(&input),
3203            indoc! {r#"
3204                <[fim-suffix]>
3205                after
3206                <[fim-prefix]><filename>test.rs
3207                before
3208                <<<<<<< CURRENT
3209                mid<|user_cursor|>dle
3210                =======
3211                <[fim-middle]>"#}
3212        );
3213    }
3214
3215    #[test]
3216    fn test_seed_coder_truncation_drops_context() {
3217        let input = make_input(
3218            "code",
3219            0..4,
3220            2,
3221            vec![make_event("a.rs", "-x\n+y\n")],
3222            vec![make_related_file("r1.rs", "content\n")],
3223        );
3224
3225        // With large budget, everything is included
3226        assert_eq!(
3227            format_seed_coder(&input),
3228            indoc! {r#"
3229                <[fim-suffix]>
3230                <[fim-prefix]><filename>r1.rs
3231                content
3232
3233                <filename>edit_history
3234                --- a/a.rs
3235                +++ b/a.rs
3236                -x
3237                +y
3238
3239                <filename>test.rs
3240                <<<<<<< CURRENT
3241                co<|user_cursor|>de
3242                =======
3243                <[fim-middle]>"#}
3244        );
3245
3246        // With tight budget, context is dropped but cursor section remains
3247        assert_eq!(
3248            format_seed_coder_with_budget(&input, 30),
3249            indoc! {r#"
3250                <[fim-suffix]>
3251                <[fim-prefix]><filename>test.rs
3252                <<<<<<< CURRENT
3253                co<|user_cursor|>de
3254                =======
3255                <[fim-middle]>"#}
3256        );
3257    }
3258
3259    #[test]
3260    fn test_seed_coder_truncation_prioritizes_lower_order() {
3261        let input = make_input(
3262            "code",
3263            0..4,
3264            2,
3265            vec![],
3266            vec![
3267                RelatedFile {
3268                    path: Path::new("low_prio.rs").into(),
3269                    max_row: 5,
3270                    in_open_source_repo: false,
3271                    excerpts: vec![RelatedExcerpt {
3272                        row_range: 0..5,
3273                        text: "low prio\n".into(),
3274                        order: 10,
3275                    }],
3276                },
3277                RelatedFile {
3278                    path: Path::new("high_prio.rs").into(),
3279                    max_row: 5,
3280                    in_open_source_repo: false,
3281                    excerpts: vec![RelatedExcerpt {
3282                        row_range: 0..5,
3283                        text: "high prio\n".into(),
3284                        order: 1,
3285                    }],
3286                },
3287            ],
3288        );
3289
3290        // With large budget, both included; rendered in stable lexicographic order.
3291        assert_eq!(
3292            format_seed_coder(&input),
3293            indoc! {r#"
3294                <[fim-suffix]>
3295                <[fim-prefix]><filename>low_prio.rs
3296                low prio
3297                <filename>high_prio.rs
3298                high prio
3299
3300                <filename>test.rs
3301                <<<<<<< CURRENT
3302                co<|user_cursor|>de
3303                =======
3304                <[fim-middle]>"#}
3305        );
3306
3307        // With tight budget, only high_prio included.
3308        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3309        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3310        assert_eq!(
3311            format_seed_coder_with_budget(&input, 44),
3312            indoc! {r#"
3313                <[fim-suffix]>
3314                <[fim-prefix]><filename>high_prio.rs
3315                high prio
3316
3317                <filename>test.rs
3318                <<<<<<< CURRENT
3319                co<|user_cursor|>de
3320                =======
3321                <[fim-middle]>"#}
3322        );
3323    }
3324
3325    #[test]
3326    fn test_seed_coder_clean_output() {
3327        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3328        let output_without_marker = "new code\n";
3329
3330        assert_eq!(
3331            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3332            "new code\n"
3333        );
3334        assert_eq!(
3335            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3336            "new code\n"
3337        );
3338    }
3339
3340    #[test]
3341    fn test_format_zeta1_from_input_basic() {
3342        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
3343        let input = ZetaPromptInput {
3344            cursor_path: Path::new("src/main.rs").into(),
3345            cursor_excerpt: excerpt.into(),
3346            cursor_offset_in_excerpt: 30,
3347            excerpt_start_row: Some(0),
3348            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3349            related_files: vec![],
3350            excerpt_ranges: ExcerptRanges {
3351                editable_150: 15..41,
3352                editable_180: 15..41,
3353                editable_350: 15..41,
3354                editable_150_context_350: 0..excerpt.len(),
3355                editable_180_context_350: 0..excerpt.len(),
3356                editable_350_context_150: 0..excerpt.len(),
3357                ..Default::default()
3358            },
3359            experiment: None,
3360            in_open_source_repo: false,
3361            can_collect_data: false,
3362        };
3363
3364        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3365
3366        assert_eq!(
3367            prompt,
3368            concat!(
3369                "### Instruction:\n",
3370                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3371                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3372                "into account the cursor location.\n",
3373                "\n",
3374                "### User Edits:\n",
3375                "\n",
3376                "User edited other.rs:\n",
3377                "```diff\n",
3378                "-old\n",
3379                "+new\n",
3380                "\n",
3381                "```\n",
3382                "\n",
3383                "### User Excerpt:\n",
3384                "\n",
3385                "```src/main.rs\n",
3386                "<|start_of_file|>\n",
3387                "fn before() {}\n",
3388                "<|editable_region_start|>\n",
3389                "fn foo() {\n",
3390                "    <|user_cursor_is_here|>let x = 1;\n",
3391                "\n",
3392                "<|editable_region_end|>}\n",
3393                "fn after() {}\n",
3394                "\n",
3395                "```\n",
3396                "\n",
3397                "### Response:\n",
3398            ),
3399        );
3400    }
3401
3402    #[test]
3403    fn test_format_zeta1_from_input_no_start_of_file() {
3404        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
3405        let input = ZetaPromptInput {
3406            cursor_path: Path::new("src/main.rs").into(),
3407            cursor_excerpt: excerpt.into(),
3408            cursor_offset_in_excerpt: 15,
3409            excerpt_start_row: Some(10),
3410            events: vec![],
3411            related_files: vec![],
3412            excerpt_ranges: ExcerptRanges {
3413                editable_150: 0..28,
3414                editable_180: 0..28,
3415                editable_350: 0..28,
3416                editable_150_context_350: 0..28,
3417                editable_180_context_350: 0..28,
3418                editable_350_context_150: 0..28,
3419                ..Default::default()
3420            },
3421            experiment: None,
3422            in_open_source_repo: false,
3423            can_collect_data: false,
3424        };
3425
3426        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3427
3428        assert_eq!(
3429            prompt,
3430            concat!(
3431                "### Instruction:\n",
3432                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3433                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3434                "into account the cursor location.\n",
3435                "\n",
3436                "### User Edits:\n",
3437                "\n",
3438                "\n",
3439                "\n",
3440                "### User Excerpt:\n",
3441                "\n",
3442                "```src/main.rs\n",
3443                "<|editable_region_start|>\n",
3444                "fn foo() {\n",
3445                "    <|user_cursor_is_here|>let x = 1;\n",
3446                "}\n",
3447                "\n",
3448                "<|editable_region_end|>\n",
3449                "```\n",
3450                "\n",
3451                "### Response:\n",
3452            ),
3453        );
3454    }
3455
3456    #[test]
3457    fn test_format_zeta1_from_input_with_sub_ranges() {
3458        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
3459        let editable_range = 10..37;
3460        let context_range = 0..excerpt.len();
3461
3462        let input = ZetaPromptInput {
3463            cursor_path: Path::new("test.rs").into(),
3464            cursor_excerpt: excerpt.into(),
3465            cursor_offset_in_excerpt: 25,
3466            excerpt_start_row: Some(0),
3467            events: vec![],
3468            related_files: vec![],
3469            excerpt_ranges: ExcerptRanges {
3470                editable_150: editable_range.clone(),
3471                editable_180: editable_range.clone(),
3472                editable_350: editable_range.clone(),
3473                editable_150_context_350: context_range.clone(),
3474                editable_180_context_350: context_range.clone(),
3475                editable_350_context_150: context_range.clone(),
3476                ..Default::default()
3477            },
3478            experiment: None,
3479            in_open_source_repo: false,
3480            can_collect_data: false,
3481        };
3482
3483        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3484
3485        assert_eq!(
3486            prompt,
3487            concat!(
3488                "### Instruction:\n",
3489                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3490                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3491                "into account the cursor location.\n",
3492                "\n",
3493                "### User Edits:\n",
3494                "\n",
3495                "\n",
3496                "\n",
3497                "### User Excerpt:\n",
3498                "\n",
3499                "```test.rs\n",
3500                "<|start_of_file|>\n",
3501                "// prefix\n",
3502                "<|editable_region_start|>\n",
3503                "fn foo() {\n",
3504                "    <|user_cursor_is_here|>let x = 1;\n",
3505                "}\n",
3506                "<|editable_region_end|>\n",
3507                "// suffix\n",
3508                "\n",
3509                "```\n",
3510                "\n",
3511                "### Response:\n",
3512            ),
3513        );
3514    }
3515
3516    #[test]
3517    fn test_clean_zeta1_model_output_basic() {
3518        let output = indoc! {"
3519            <|editable_region_start|>
3520            fn main() {
3521                println!(\"hello\");
3522            }
3523            <|editable_region_end|>
3524        "};
3525
3526        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3527        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
3528    }
3529
3530    #[test]
3531    fn test_clean_zeta1_model_output_with_cursor() {
3532        let output = indoc! {"
3533            <|editable_region_start|>
3534            fn main() {
3535                <|user_cursor_is_here|>println!(\"hello\");
3536            }
3537            <|editable_region_end|>
3538        "};
3539
3540        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3541        assert_eq!(
3542            cleaned,
3543            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
3544        );
3545    }
3546
3547    #[test]
3548    fn test_clean_zeta1_model_output_no_markers() {
3549        let output = "fn main() {}\n";
3550        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3551        assert_eq!(cleaned, "fn main() {}\n");
3552    }
3553
3554    #[test]
3555    fn test_clean_zeta1_model_output_empty_region() {
3556        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3557        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3558        assert_eq!(cleaned, "");
3559    }
3560}