zeta_prompt.rs

   1use anyhow::{Result, anyhow};
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    #[serde(default)]
  55    pub related_files: Option<Vec<RelatedFile>>,
  56    /// These ranges let the server select model-appropriate subsets.
  57    pub excerpt_ranges: ExcerptRanges,
  58    /// The name of the edit prediction model experiment to use.
  59    #[serde(default, skip_serializing_if = "Option::is_none")]
  60    pub experiment: Option<String>,
  61    #[serde(default)]
  62    pub in_open_source_repo: bool,
  63    #[serde(default)]
  64    pub can_collect_data: bool,
  65    #[serde(default, skip_serializing_if = "Option::is_none")]
  66    pub repo_url: Option<String>,
  67}
  68
  69#[derive(
  70    Default,
  71    Clone,
  72    Copy,
  73    Debug,
  74    PartialEq,
  75    Eq,
  76    Hash,
  77    EnumIter,
  78    IntoStaticStr,
  79    Serialize,
  80    Deserialize,
  81)]
  82#[allow(non_camel_case_types)]
  83pub enum ZetaFormat {
  84    V0112MiddleAtEnd,
  85    V0113Ordered,
  86    V0114180EditableRegion,
  87    V0120GitMergeMarkers,
  88    #[default]
  89    V0131GitMergeMarkersPrefix,
  90    V0211Prefill,
  91    V0211SeedCoder,
  92    v0226Hashline,
  93    V0304VariableEdit,
  94    V0304SeedNoEdits,
  95}
  96
  97impl std::fmt::Display for ZetaFormat {
  98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  99        write!(f, "{}", <&'static str>::from(self))
 100    }
 101}
 102
 103impl ZetaFormat {
 104    pub fn parse(format_name: &str) -> Result<Self> {
 105        let mut results = ZetaFormat::iter().filter(|version| {
 106            <&'static str>::from(version)
 107                .to_lowercase()
 108                .contains(&format_name.to_lowercase())
 109        });
 110        let Some(result) = results.next() else {
 111            anyhow::bail!(
 112                "`{format_name}` did not match any of:\n{}",
 113                Self::options_as_string()
 114            );
 115        };
 116        if results.next().is_some() {
 117            anyhow::bail!(
 118                "`{format_name}` matched more than one of:\n{}",
 119                Self::options_as_string()
 120            );
 121        }
 122        Ok(result)
 123    }
 124
 125    pub fn options_as_string() -> String {
 126        ZetaFormat::iter()
 127            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 128            .collect::<Vec<_>>()
 129            .concat()
 130    }
 131}
 132
 133#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 134#[serde(tag = "event")]
 135pub enum Event {
 136    BufferChange {
 137        path: Arc<Path>,
 138        old_path: Arc<Path>,
 139        diff: String,
 140        predicted: bool,
 141        in_open_source_repo: bool,
 142    },
 143}
 144
 145impl Event {
 146    pub fn in_open_source_repo(&self) -> bool {
 147        match self {
 148            Event::BufferChange {
 149                in_open_source_repo,
 150                ..
 151            } => *in_open_source_repo,
 152        }
 153    }
 154}
 155
 156pub fn write_event(prompt: &mut String, event: &Event) {
 157    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 158        for component in path.components() {
 159            prompt.push('/');
 160            write!(prompt, "{}", component.as_os_str().display()).ok();
 161        }
 162    }
 163    match event {
 164        Event::BufferChange {
 165            path,
 166            old_path,
 167            diff,
 168            predicted,
 169            in_open_source_repo: _,
 170        } => {
 171            if *predicted {
 172                prompt.push_str("// User accepted prediction:\n");
 173            }
 174            prompt.push_str("--- a");
 175            write_path_as_unix_str(prompt, old_path.as_ref());
 176            prompt.push_str("\n+++ b");
 177            write_path_as_unix_str(prompt, path.as_ref());
 178            prompt.push('\n');
 179            prompt.push_str(diff);
 180        }
 181    }
 182}
 183
 184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 185pub struct RelatedFile {
 186    pub path: Arc<Path>,
 187    pub max_row: u32,
 188    pub excerpts: Vec<RelatedExcerpt>,
 189    #[serde(default)]
 190    pub in_open_source_repo: bool,
 191}
 192
 193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 194pub struct RelatedExcerpt {
 195    pub row_range: Range<u32>,
 196    pub text: Arc<str>,
 197    #[serde(default)]
 198    pub order: usize,
 199}
 200
 201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 202    special_tokens_for_format(format)
 203        .iter()
 204        .any(|token| input.cursor_excerpt.contains(token))
 205}
 206
 207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 208    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 209}
 210
 211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 212    match format {
 213        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 214        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 215        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 216        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 217        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 218        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 219        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 220        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 221        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 222        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 223    }
 224}
 225
 226pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 227    match format {
 228        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 229        ZetaFormat::V0112MiddleAtEnd
 230        | ZetaFormat::V0113Ordered
 231        | ZetaFormat::V0114180EditableRegion
 232        | ZetaFormat::V0120GitMergeMarkers
 233        | ZetaFormat::V0131GitMergeMarkersPrefix
 234        | ZetaFormat::V0211Prefill
 235        | ZetaFormat::V0211SeedCoder
 236        | ZetaFormat::V0304VariableEdit
 237        | ZetaFormat::V0304SeedNoEdits => &[],
 238    }
 239}
 240
 241pub fn excerpt_ranges_for_format(
 242    format: ZetaFormat,
 243    ranges: &ExcerptRanges,
 244) -> (Range<usize>, Range<usize>) {
 245    match format {
 246        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 247            ranges.editable_150.clone(),
 248            ranges.editable_150_context_350.clone(),
 249        ),
 250        ZetaFormat::V0114180EditableRegion => (
 251            ranges.editable_180.clone(),
 252            ranges.editable_180_context_350.clone(),
 253        ),
 254        ZetaFormat::V0120GitMergeMarkers
 255        | ZetaFormat::V0131GitMergeMarkersPrefix
 256        | ZetaFormat::V0211Prefill
 257        | ZetaFormat::V0211SeedCoder
 258        | ZetaFormat::v0226Hashline
 259        | ZetaFormat::V0304SeedNoEdits => (
 260            ranges.editable_350.clone(),
 261            ranges.editable_350_context_150.clone(),
 262        ),
 263        ZetaFormat::V0304VariableEdit => {
 264            let context = ranges
 265                .context_8192
 266                .clone()
 267                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 268            (context.clone(), context)
 269        }
 270    }
 271}
 272
 273pub fn write_cursor_excerpt_section_for_format(
 274    format: ZetaFormat,
 275    prompt: &mut String,
 276    path: &Path,
 277    context: &str,
 278    editable_range: &Range<usize>,
 279    cursor_offset: usize,
 280) {
 281    match format {
 282        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 283            prompt,
 284            path,
 285            context,
 286            editable_range,
 287            cursor_offset,
 288        ),
 289        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 290            v0113_ordered::write_cursor_excerpt_section(
 291                prompt,
 292                path,
 293                context,
 294                editable_range,
 295                cursor_offset,
 296            )
 297        }
 298        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 299            prompt,
 300            path,
 301            context,
 302            editable_range,
 303            cursor_offset,
 304        ),
 305        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 306            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 307                prompt,
 308                path,
 309                context,
 310                editable_range,
 311                cursor_offset,
 312            )
 313        }
 314        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 315            seed_coder::write_cursor_excerpt_section(
 316                prompt,
 317                path,
 318                context,
 319                editable_range,
 320                cursor_offset,
 321            )
 322        }
 323        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 324            prompt,
 325            path,
 326            context,
 327            editable_range,
 328            cursor_offset,
 329        ),
 330        ZetaFormat::V0304VariableEdit => {
 331            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 332        }
 333    }
 334}
 335
 336fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 337    let start_row = text[0..range.start].matches('\n').count() as u32;
 338    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 339    if !text[..range.end].ends_with('\n') {
 340        end_row += 1;
 341    }
 342    return start_row..end_row;
 343}
 344
 345pub fn format_prompt_with_budget_for_format(
 346    input: &ZetaPromptInput,
 347    format: ZetaFormat,
 348    max_tokens: usize,
 349) -> String {
 350    let (context, editable_range, context_range, cursor_offset) =
 351        resolve_cursor_region(input, format);
 352    let path = &*input.cursor_path;
 353
 354    let empty_files = Vec::new();
 355    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 356    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 357        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 358        let row_range = relative_row_range.start + cursor_excerpt_start_row
 359            ..relative_row_range.end + cursor_excerpt_start_row;
 360        &filter_redundant_excerpts(
 361            input_related_files.to_vec(),
 362            input.cursor_path.as_ref(),
 363            row_range,
 364        )
 365    } else {
 366        input_related_files
 367    };
 368
 369    match format {
 370        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 371            seed_coder::format_prompt_with_budget(
 372                path,
 373                context,
 374                &editable_range,
 375                cursor_offset,
 376                &input.events,
 377                related_files,
 378                max_tokens,
 379            )
 380        }
 381        _ => {
 382            let mut cursor_section = String::new();
 383            write_cursor_excerpt_section_for_format(
 384                format,
 385                &mut cursor_section,
 386                path,
 387                context,
 388                &editable_range,
 389                cursor_offset,
 390            );
 391
 392            let cursor_tokens = estimate_tokens(cursor_section.len());
 393            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 394
 395            let edit_history_section = format_edit_history_within_budget(
 396                &input.events,
 397                "<|file_sep|>",
 398                "edit history",
 399                budget_after_cursor,
 400            );
 401            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 402            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 403
 404            let related_files_section = format_related_files_within_budget(
 405                &related_files,
 406                "<|file_sep|>",
 407                "",
 408                budget_after_edit_history,
 409            );
 410
 411            let mut prompt = String::new();
 412            prompt.push_str(&related_files_section);
 413            prompt.push_str(&edit_history_section);
 414            prompt.push_str(&cursor_section);
 415            prompt
 416        }
 417    }
 418}
 419
 420pub fn filter_redundant_excerpts(
 421    mut related_files: Vec<RelatedFile>,
 422    cursor_path: &Path,
 423    cursor_row_range: Range<u32>,
 424) -> Vec<RelatedFile> {
 425    for file in &mut related_files {
 426        if file.path.as_ref() == cursor_path {
 427            file.excerpts.retain(|excerpt| {
 428                excerpt.row_range.start < cursor_row_range.start
 429                    || excerpt.row_range.end > cursor_row_range.end
 430            });
 431        }
 432    }
 433    related_files.retain(|file| !file.excerpts.is_empty());
 434    related_files
 435}
 436
 437pub fn get_prefill_for_format(
 438    format: ZetaFormat,
 439    context: &str,
 440    editable_range: &Range<usize>,
 441) -> String {
 442    match format {
 443        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 444        ZetaFormat::V0112MiddleAtEnd
 445        | ZetaFormat::V0113Ordered
 446        | ZetaFormat::V0114180EditableRegion
 447        | ZetaFormat::V0120GitMergeMarkers
 448        | ZetaFormat::V0131GitMergeMarkersPrefix
 449        | ZetaFormat::V0211SeedCoder
 450        | ZetaFormat::v0226Hashline
 451        | ZetaFormat::V0304VariableEdit => String::new(),
 452        ZetaFormat::V0304SeedNoEdits => String::new(),
 453    }
 454}
 455
 456pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 457    match format {
 458        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 459        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 460        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 461        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
 462        ZetaFormat::V0112MiddleAtEnd
 463        | ZetaFormat::V0113Ordered
 464        | ZetaFormat::V0114180EditableRegion
 465        | ZetaFormat::v0226Hashline
 466        | ZetaFormat::V0304VariableEdit => None,
 467    }
 468}
 469
 470pub fn encode_patch_as_output_for_format(
 471    format: ZetaFormat,
 472    old_editable_region: &str,
 473    patch: &str,
 474    cursor_offset: Option<usize>,
 475) -> Result<Option<String>> {
 476    match format {
 477        ZetaFormat::v0226Hashline => {
 478            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 479        }
 480        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 481            old_editable_region,
 482            patch,
 483            cursor_offset,
 484        )
 485        .map(Some),
 486        ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
 487        _ => Ok(None),
 488    }
 489}
 490
 491pub struct ParsedOutput {
 492    /// Text that should replace the editable region
 493    pub new_editable_region: String,
 494    /// The byte range within `cursor_excerpt` that this replacement applies to
 495    pub range_in_excerpt: Range<usize>,
 496}
 497
 498/// Parse model output for the given zeta format
 499pub fn parse_zeta2_model_output(
 500    output: &str,
 501    format: ZetaFormat,
 502    prompt_inputs: &ZetaPromptInput,
 503) -> Result<ParsedOutput> {
 504    let output = match output_end_marker_for_format(format) {
 505        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 506        None => output,
 507    };
 508
 509    let (context, editable_range_in_context, context_range, _) =
 510        resolve_cursor_region(prompt_inputs, format);
 511    let context_start = context_range.start;
 512    let old_editable_region = &context[editable_range_in_context.clone()];
 513
 514    let (range_in_context, output) = match format {
 515        ZetaFormat::v0226Hashline => (
 516            editable_range_in_context,
 517            if hashline::output_has_edit_commands(output) {
 518                hashline::apply_edit_commands(old_editable_region, output)
 519            } else {
 520                output.to_string()
 521            },
 522        ),
 523        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 524        ZetaFormat::V0304SeedNoEdits => (
 525            editable_range_in_context,
 526            if output.starts_with(seed_coder::NO_EDITS) {
 527                old_editable_region.to_string()
 528            } else {
 529                output.to_string()
 530            },
 531        ),
 532        _ => (editable_range_in_context, output.to_string()),
 533    };
 534
 535    let range_in_excerpt =
 536        range_in_context.start + context_start..range_in_context.end + context_start;
 537
 538    Ok(ParsedOutput {
 539        new_editable_region: output,
 540        range_in_excerpt,
 541    })
 542}
 543
 544pub fn excerpt_range_for_format(
 545    format: ZetaFormat,
 546    ranges: &ExcerptRanges,
 547) -> (Range<usize>, Range<usize>) {
 548    excerpt_ranges_for_format(format, ranges)
 549}
 550
 551pub fn resolve_cursor_region(
 552    input: &ZetaPromptInput,
 553    format: ZetaFormat,
 554) -> (&str, Range<usize>, Range<usize>, usize) {
 555    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 556    let context_start = context_range.start;
 557    let context_text = &input.cursor_excerpt[context_range.clone()];
 558    let adjusted_editable =
 559        (editable_range.start - context_start)..(editable_range.end - context_start);
 560    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 561
 562    (
 563        context_text,
 564        adjusted_editable,
 565        context_range,
 566        adjusted_cursor,
 567    )
 568}
 569
 570pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 571    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 572    get_prefill_for_format(format, context, &editable_range)
 573}
 574
 575fn format_edit_history_within_budget(
 576    events: &[Arc<Event>],
 577    file_marker: &str,
 578    edit_history_name: &str,
 579    max_tokens: usize,
 580) -> String {
 581    let header = format!("{}{}\n", file_marker, edit_history_name);
 582    let header_tokens = estimate_tokens(header.len());
 583    if header_tokens >= max_tokens {
 584        return String::new();
 585    }
 586
 587    let mut event_strings: Vec<String> = Vec::new();
 588    let mut total_tokens = header_tokens;
 589
 590    for event in events.iter().rev() {
 591        let mut event_str = String::new();
 592        write_event(&mut event_str, event);
 593        let event_tokens = estimate_tokens(event_str.len());
 594
 595        if total_tokens + event_tokens > max_tokens {
 596            break;
 597        }
 598        total_tokens += event_tokens;
 599        event_strings.push(event_str);
 600    }
 601
 602    if event_strings.is_empty() {
 603        return String::new();
 604    }
 605
 606    let mut result = header;
 607    for event_str in event_strings.iter().rev() {
 608        result.push_str(event_str);
 609    }
 610    result
 611}
 612
 613fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 614    let needs_newline = !excerpt.text.ends_with('\n');
 615    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 616    let len = excerpt.text.len()
 617        + if needs_newline { "\n".len() } else { 0 }
 618        + if needs_ellipsis { "...\n".len() } else { 0 };
 619    estimate_tokens(len)
 620}
 621
 622pub fn format_related_files_within_budget(
 623    related_files: &[RelatedFile],
 624    file_prefix: &str,
 625    file_suffix: &str,
 626    max_tokens: usize,
 627) -> String {
 628    struct ExcerptCandidate {
 629        file_ix: usize,
 630        excerpt_ix: usize,
 631        order: usize,
 632    }
 633
 634    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 635        .iter()
 636        .enumerate()
 637        .flat_map(|(file_ix, file)| {
 638            file.excerpts
 639                .iter()
 640                .enumerate()
 641                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 642                    file_ix,
 643                    excerpt_ix,
 644                    order: e.order,
 645                })
 646        })
 647        .collect();
 648
 649    // Pre-compute file header strings and their token costs.
 650    let file_headers: Vec<String> = related_files
 651        .iter()
 652        .map(|file| {
 653            let path_str = file.path.to_string_lossy();
 654            format!("{}{}\n", file_prefix, path_str)
 655        })
 656        .collect();
 657
 658    // Sort the excerpts by their order and determine how many fit within the budget.
 659    let mut total_tokens = 0;
 660    let mut included_excerpt_count = 0_usize;
 661    let mut included_file_indices = vec![false; related_files.len()];
 662    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 663    for candidate in &excerpt_candidates {
 664        let file = &related_files[candidate.file_ix];
 665        let excerpt = &file.excerpts[candidate.excerpt_ix];
 666        let file_already_included = included_file_indices[candidate.file_ix];
 667        let header_cost = if file_already_included {
 668            0
 669        } else {
 670            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 671        };
 672        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 673        if total_tokens + header_cost + excerpt_cost > max_tokens {
 674            break;
 675        }
 676        total_tokens += header_cost + excerpt_cost;
 677        if !file_already_included {
 678            included_file_indices[candidate.file_ix] = true;
 679        }
 680        included_excerpt_count += 1;
 681    }
 682
 683    excerpt_candidates.truncate(included_excerpt_count);
 684    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 685
 686    // Render all of the files that fit within the token budget, in the original order.
 687    let mut result = String::new();
 688    let mut last_file_ix = None;
 689    for candidate in &excerpt_candidates {
 690        if last_file_ix != Some(candidate.file_ix) {
 691            if last_file_ix.is_some() {
 692                result.push_str(file_suffix);
 693            }
 694            result.push_str(&file_headers[candidate.file_ix]);
 695            last_file_ix = Some(candidate.file_ix);
 696        }
 697        let file = &related_files[candidate.file_ix];
 698        let excerpt = &file.excerpts[candidate.excerpt_ix];
 699        result.push_str(&excerpt.text);
 700        if !result.ends_with('\n') {
 701            result.push('\n');
 702        }
 703        if excerpt.row_range.end < file.max_row {
 704            result.push_str("...\n");
 705        }
 706    }
 707
 708    result
 709}
 710
 711pub fn write_related_files(
 712    prompt: &mut String,
 713    related_files: &[RelatedFile],
 714) -> Vec<Range<usize>> {
 715    let mut ranges = Vec::new();
 716    for file in related_files {
 717        let start = prompt.len();
 718        let path_str = file.path.to_string_lossy();
 719        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 720        for excerpt in &file.excerpts {
 721            prompt.push_str(&excerpt.text);
 722            if !prompt.ends_with('\n') {
 723                prompt.push('\n');
 724            }
 725            if excerpt.row_range.end < file.max_row {
 726                prompt.push_str("...\n");
 727            }
 728        }
 729        let end = prompt.len();
 730        ranges.push(start..end);
 731    }
 732    ranges
 733}
 734
 735mod v0112_middle_at_end {
 736    use super::*;
 737
 738    pub fn special_tokens() -> &'static [&'static str] {
 739        &[
 740            "<|fim_prefix|>",
 741            "<|fim_suffix|>",
 742            "<|fim_middle|>",
 743            "<|file_sep|>",
 744            CURSOR_MARKER,
 745        ]
 746    }
 747
 748    pub fn write_cursor_excerpt_section(
 749        prompt: &mut String,
 750        path: &Path,
 751        context: &str,
 752        editable_range: &Range<usize>,
 753        cursor_offset: usize,
 754    ) {
 755        let path_str = path.to_string_lossy();
 756        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 757
 758        prompt.push_str("<|fim_prefix|>\n");
 759        prompt.push_str(&context[..editable_range.start]);
 760
 761        prompt.push_str("<|fim_suffix|>\n");
 762        prompt.push_str(&context[editable_range.end..]);
 763        if !prompt.ends_with('\n') {
 764            prompt.push('\n');
 765        }
 766
 767        prompt.push_str("<|fim_middle|>current\n");
 768        prompt.push_str(&context[editable_range.start..cursor_offset]);
 769        prompt.push_str(CURSOR_MARKER);
 770        prompt.push_str(&context[cursor_offset..editable_range.end]);
 771        if !prompt.ends_with('\n') {
 772            prompt.push('\n');
 773        }
 774
 775        prompt.push_str("<|fim_middle|>updated\n");
 776    }
 777}
 778
 779mod v0113_ordered {
 780    use super::*;
 781
 782    pub fn special_tokens() -> &'static [&'static str] {
 783        &[
 784            "<|fim_prefix|>",
 785            "<|fim_suffix|>",
 786            "<|fim_middle|>",
 787            "<|file_sep|>",
 788            CURSOR_MARKER,
 789        ]
 790    }
 791
 792    pub fn write_cursor_excerpt_section(
 793        prompt: &mut String,
 794        path: &Path,
 795        context: &str,
 796        editable_range: &Range<usize>,
 797        cursor_offset: usize,
 798    ) {
 799        let path_str = path.to_string_lossy();
 800        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 801
 802        prompt.push_str("<|fim_prefix|>\n");
 803        prompt.push_str(&context[..editable_range.start]);
 804        if !prompt.ends_with('\n') {
 805            prompt.push('\n');
 806        }
 807
 808        prompt.push_str("<|fim_middle|>current\n");
 809        prompt.push_str(&context[editable_range.start..cursor_offset]);
 810        prompt.push_str(CURSOR_MARKER);
 811        prompt.push_str(&context[cursor_offset..editable_range.end]);
 812        if !prompt.ends_with('\n') {
 813            prompt.push('\n');
 814        }
 815
 816        prompt.push_str("<|fim_suffix|>\n");
 817        prompt.push_str(&context[editable_range.end..]);
 818        if !prompt.ends_with('\n') {
 819            prompt.push('\n');
 820        }
 821
 822        prompt.push_str("<|fim_middle|>updated\n");
 823    }
 824}
 825
 826mod v0114180_editable_region {
 827    use super::*;
 828
 829    pub fn special_tokens() -> &'static [&'static str] {
 830        v0113_ordered::special_tokens()
 831    }
 832}
 833
 834pub mod v0120_git_merge_markers {
 835    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 836    //!
 837    //! Example prompt:
 838    //!
 839    //! <|file_sep|>path/to/target_file.py
 840    //! <|fim_prefix|>
 841    //! code before editable region
 842    //! <|fim_suffix|>
 843    //! code after editable region
 844    //! <|fim_middle|>
 845    //! <<<<<<< CURRENT
 846    //! code that
 847    //! needs to<|user_cursor|>
 848    //! be rewritten
 849    //! =======
 850    //!
 851    //! Expected output (should be generated by the model):
 852    //!
 853    //! updated
 854    //! code with
 855    //! changes applied
 856    //! >>>>>>> UPDATED
 857
 858    use super::*;
 859
 860    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 861    pub const SEPARATOR: &str = "=======\n";
 862    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 863
 864    pub fn special_tokens() -> &'static [&'static str] {
 865        &[
 866            "<|fim_prefix|>",
 867            "<|fim_suffix|>",
 868            "<|fim_middle|>",
 869            "<|file_sep|>",
 870            START_MARKER,
 871            SEPARATOR,
 872            END_MARKER,
 873            CURSOR_MARKER,
 874        ]
 875    }
 876
 877    pub fn write_cursor_excerpt_section(
 878        prompt: &mut String,
 879        path: &Path,
 880        context: &str,
 881        editable_range: &Range<usize>,
 882        cursor_offset: usize,
 883    ) {
 884        let path_str = path.to_string_lossy();
 885        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 886
 887        prompt.push_str("<|fim_prefix|>");
 888        prompt.push_str(&context[..editable_range.start]);
 889
 890        prompt.push_str("<|fim_suffix|>");
 891        prompt.push_str(&context[editable_range.end..]);
 892        if !prompt.ends_with('\n') {
 893            prompt.push('\n');
 894        }
 895
 896        prompt.push_str("<|fim_middle|>");
 897        prompt.push_str(START_MARKER);
 898        prompt.push_str(&context[editable_range.start..cursor_offset]);
 899        prompt.push_str(CURSOR_MARKER);
 900        prompt.push_str(&context[cursor_offset..editable_range.end]);
 901        if !prompt.ends_with('\n') {
 902            prompt.push('\n');
 903        }
 904        prompt.push_str(SEPARATOR);
 905    }
 906}
 907
 908pub mod v0131_git_merge_markers_prefix {
 909    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 910    //!
 911    //! Example prompt:
 912    //!
 913    //! <|file_sep|>path/to/target_file.py
 914    //! <|fim_prefix|>
 915    //! code before editable region
 916    //! <<<<<<< CURRENT
 917    //! code that
 918    //! needs to<|user_cursor|>
 919    //! be rewritten
 920    //! =======
 921    //! <|fim_suffix|>
 922    //! code after editable region
 923    //! <|fim_middle|>
 924    //!
 925    //! Expected output (should be generated by the model):
 926    //!
 927    //! updated
 928    //! code with
 929    //! changes applied
 930    //! >>>>>>> UPDATED
 931
 932    use super::*;
 933
 934    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 935    pub const SEPARATOR: &str = "=======\n";
 936    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 937
 938    pub fn special_tokens() -> &'static [&'static str] {
 939        &[
 940            "<|fim_prefix|>",
 941            "<|fim_suffix|>",
 942            "<|fim_middle|>",
 943            "<|file_sep|>",
 944            START_MARKER,
 945            SEPARATOR,
 946            END_MARKER,
 947            CURSOR_MARKER,
 948        ]
 949    }
 950
 951    pub fn write_cursor_excerpt_section(
 952        prompt: &mut String,
 953        path: &Path,
 954        context: &str,
 955        editable_range: &Range<usize>,
 956        cursor_offset: usize,
 957    ) {
 958        let path_str = path.to_string_lossy();
 959        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 960
 961        prompt.push_str("<|fim_prefix|>");
 962        prompt.push_str(&context[..editable_range.start]);
 963        prompt.push_str(START_MARKER);
 964        prompt.push_str(&context[editable_range.start..cursor_offset]);
 965        prompt.push_str(CURSOR_MARKER);
 966        prompt.push_str(&context[cursor_offset..editable_range.end]);
 967        if !prompt.ends_with('\n') {
 968            prompt.push('\n');
 969        }
 970        prompt.push_str(SEPARATOR);
 971
 972        prompt.push_str("<|fim_suffix|>");
 973        prompt.push_str(&context[editable_range.end..]);
 974        if !prompt.ends_with('\n') {
 975            prompt.push('\n');
 976        }
 977
 978        prompt.push_str("<|fim_middle|>");
 979    }
 980}
 981
 982pub mod v0211_prefill {
 983    use super::*;
 984
 985    pub fn special_tokens() -> &'static [&'static str] {
 986        v0131_git_merge_markers_prefix::special_tokens()
 987    }
 988
 989    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 990        let editable_region = &context[editable_range.start..editable_range.end];
 991
 992        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 993        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 994
 995        // Find a token boundary to avoid splitting tokens in the prefill.
 996        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 997        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 998        // the \n and consume any consecutive \n characters after it.
 999        let prefill = &editable_region[..prefill_len];
1000        match prefill.rfind('\n') {
1001            Some(pos) => {
1002                let mut end = pos + 1;
1003                while end < editable_region.len()
1004                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1005                {
1006                    end += 1;
1007                }
1008                editable_region[..end].to_string()
1009            }
1010            // No newline found. Fall back to splitting before the last space
1011            // (word-level boundary)
1012            None => match prefill.rfind(' ') {
1013                Some(pos) => prefill[..pos].to_string(),
1014                None => prefill.to_string(),
1015            },
1016        }
1017    }
1018}
1019
1020pub mod hashline {
1021
1022    use std::fmt::Display;
1023
1024    pub const END_MARKER: &str = "<|fim_middle|>updated";
1025    pub const START_MARKER: &str = "<|fim_middle|>current";
1026
1027    use super::*;
1028
1029    const SET_COMMAND_MARKER: &str = "<|set|>";
1030    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1031    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1032
1033    pub fn special_tokens() -> &'static [&'static str] {
1034        return &[
1035            SET_COMMAND_MARKER,
1036            "<|set_range|>",
1037            INSERT_COMMAND_MARKER,
1038            NO_EDITS_COMMAND_MARKER,
1039            CURSOR_MARKER,
1040            "<|file_sep|>",
1041            "<|fim_prefix|>",
1042            "<|fim_suffix|>",
1043            "<|fim_middle|>",
1044        ];
1045    }
1046
1047    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1048    #[derive(Debug, Clone, PartialEq, Eq)]
1049    struct LineRef {
1050        index: usize,
1051        hash: u8,
1052    }
1053
1054    impl Display for LineRef {
1055        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1056            write!(f, "{}:{:02x}", self.index, self.hash)
1057        }
1058    }
1059
1060    pub fn hash_line(line: &[u8]) -> u8 {
1061        let mut h: u8 = 0;
1062        for &byte in line {
1063            h = h.wrapping_add(byte);
1064        }
1065        return h;
1066    }
1067
1068    /// Write the hashline-encoded editable region into `out`. Each line of
1069    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1070    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1071    /// to the start of `editable_text`).
1072    pub fn write_hashline_editable_region(
1073        out: &mut String,
1074        editable_text: &str,
1075        cursor_offset_in_editable: usize,
1076    ) {
1077        let mut offset = 0;
1078        for (i, line) in editable_text.lines().enumerate() {
1079            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1080                && cursor_offset_in_editable < offset + line.len()
1081            {
1082                (
1083                    &line[..cursor_offset_in_editable - offset],
1084                    CURSOR_MARKER,
1085                    &line[cursor_offset_in_editable - offset..],
1086                )
1087            } else {
1088                (line, "", "")
1089            };
1090            write!(
1091                out,
1092                "\n{}|{head}{cursor}{tail}",
1093                LineRef {
1094                    index: i,
1095                    hash: hash_line(line.as_bytes())
1096                }
1097            )
1098            .unwrap();
1099            offset += line.len() + 1;
1100        }
1101    }
1102
1103    pub fn write_cursor_excerpt_section(
1104        prompt: &mut String,
1105        path: &Path,
1106        context: &str,
1107        editable_range: &Range<usize>,
1108        cursor_offset: usize,
1109    ) {
1110        let path_str = path.to_string_lossy();
1111        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1112
1113        prompt.push_str("<|fim_prefix|>\n");
1114        prompt.push_str(&context[..editable_range.start]);
1115        prompt.push_str(START_MARKER);
1116
1117        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1118        let editable_region = &context[editable_range.clone()];
1119        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1120
1121        if !prompt.ends_with('\n') {
1122            prompt.push('\n');
1123        }
1124
1125        prompt.push_str("<|fim_suffix|>\n");
1126        prompt.push_str(&context[editable_range.end..]);
1127        if !prompt.ends_with('\n') {
1128            prompt.push('\n');
1129        }
1130
1131        prompt.push_str(END_MARKER);
1132        prompt.push('\n');
1133    }
1134
1135    /// A single edit command parsed from the model output.
1136    #[derive(Debug)]
1137    enum EditCommand<'a> {
1138        /// Replace a range of lines (inclusive on both ends). Single-line set is
1139        /// represented by `start == end`.
1140        Set {
1141            start: LineRef,
1142            end: LineRef,
1143            content: &'a str,
1144        },
1145        /// Insert new lines after the given line, or before the first line if
1146        /// `after` is `None`.
1147        Insert {
1148            after: Option<LineRef>,
1149            content: &'a str,
1150        },
1151    }
1152
1153    /// Parse a line reference like `3:c3` into a `LineRef`.
1154    fn parse_line_ref(s: &str) -> Option<LineRef> {
1155        let (idx_str, hash_str) = s.split_once(':')?;
1156        let index = idx_str.parse::<usize>().ok()?;
1157        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1158        Some(LineRef { index, hash })
1159    }
1160
1161    /// Parse the model output into a list of `EditCommand`s.
1162    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1163        let mut commands = Vec::new();
1164        let mut offset = 0usize;
1165
1166        while offset < model_output.len() {
1167            let next_nl = model_output[offset..]
1168                .find('\n')
1169                .map(|i| offset + i)
1170                .unwrap_or(model_output.len());
1171            let line = &model_output[offset..next_nl];
1172            let line_end = if next_nl < model_output.len() {
1173                next_nl + 1
1174            } else {
1175                next_nl
1176            };
1177
1178            let trimmed = line.trim();
1179            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1180                (true, spec)
1181            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1182                (false, spec)
1183            } else {
1184                offset = line_end;
1185                continue;
1186            };
1187
1188            let mut content_end = line_end;
1189            let mut scan = line_end;
1190
1191            while scan < model_output.len() {
1192                let body_nl = model_output[scan..]
1193                    .find('\n')
1194                    .map(|i| scan + i)
1195                    .unwrap_or(model_output.len());
1196                let body_line = &model_output[scan..body_nl];
1197                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1198                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1199                {
1200                    break;
1201                }
1202                scan = if body_nl < model_output.len() {
1203                    body_nl + 1
1204                } else {
1205                    body_nl
1206                };
1207                content_end = scan;
1208            }
1209
1210            let content = &model_output[line_end..content_end];
1211
1212            if is_set {
1213                if let Some((start_str, end_str)) = specifier.split_once('-') {
1214                    if let (Some(start), Some(end)) =
1215                        (parse_line_ref(start_str), parse_line_ref(end_str))
1216                    {
1217                        commands.push(EditCommand::Set {
1218                            start,
1219                            end,
1220                            content,
1221                        });
1222                    }
1223                } else if let Some(target) = parse_line_ref(specifier) {
1224                    commands.push(EditCommand::Set {
1225                        start: target.clone(),
1226                        end: target,
1227                        content,
1228                    });
1229                }
1230            } else {
1231                let after = parse_line_ref(specifier);
1232                commands.push(EditCommand::Insert { after, content });
1233            }
1234
1235            offset = scan;
1236        }
1237
1238        commands
1239    }
1240
1241    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1242    /// (as opposed to being a plain full-replacement output).
1243    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1244    /// editable region, returning the plain text content.
1245    pub fn strip_hashline_prefixes(region: &str) -> String {
1246        let mut decoded: String = region
1247            .lines()
1248            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1249            .collect::<Vec<_>>()
1250            .join("\n");
1251        if region.ends_with('\n') {
1252            decoded.push('\n');
1253        }
1254        decoded
1255    }
1256
1257    pub fn output_has_edit_commands(model_output: &str) -> bool {
1258        model_output.contains(SET_COMMAND_MARKER)
1259            || model_output.contains(INSERT_COMMAND_MARKER)
1260            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1261    }
1262
1263    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1264    /// original editable region text.
1265    ///
1266    /// `editable_region` is the original text of the editable region (without hash
1267    /// prefixes). `model_output` is the raw model response containing edit commands.
1268    ///
1269    /// Returns the full replacement text for the editable region.
1270    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1271        if model_output
1272            .trim_start()
1273            .starts_with(NO_EDITS_COMMAND_MARKER)
1274        {
1275            return editable_region.to_string();
1276        }
1277
1278        let original_lines: Vec<&str> = editable_region.lines().collect();
1279        let old_hashes: Vec<u8> = original_lines
1280            .iter()
1281            .map(|line| hash_line(line.as_bytes()))
1282            .collect();
1283
1284        let commands = parse_edit_commands(model_output);
1285
1286        // For set operations: indexed by start line → Some((end line index, content))
1287        // For insert operations: indexed by line index → vec of content to insert after
1288        // Insert-before-first is tracked separately.
1289        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1290        let mut insert_before_first: Vec<&str> = Vec::new();
1291        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1292
1293        for command in &commands {
1294            match command {
1295                EditCommand::Set {
1296                    start,
1297                    end,
1298                    content,
1299                } => {
1300                    if start.index < old_hashes.len()
1301                        && end.index < old_hashes.len()
1302                        && start.index <= end.index
1303                        && old_hashes[start.index] == start.hash
1304                        && old_hashes[end.index] == end.hash
1305                    {
1306                        set_ops[start.index] = Some((end.index, *content));
1307                    }
1308                }
1309                EditCommand::Insert { after, content } => match after {
1310                    None => insert_before_first.push(*content),
1311                    Some(line_ref) => {
1312                        if line_ref.index < old_hashes.len()
1313                            && old_hashes[line_ref.index] == line_ref.hash
1314                        {
1315                            insert_after[line_ref.index].push(*content);
1316                        }
1317                    }
1318                },
1319            }
1320        }
1321
1322        let mut result = String::new();
1323
1324        // Emit any insertions before the first line
1325        for content in &insert_before_first {
1326            result.push_str(content);
1327            if !content.ends_with('\n') {
1328                result.push('\n');
1329            }
1330        }
1331
1332        let mut i = 0;
1333        while i < original_lines.len() {
1334            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1335                // Replace lines i..=end_index with the replacement content
1336                result.push_str(replacement);
1337                if !replacement.is_empty() && !replacement.ends_with('\n') {
1338                    result.push('\n');
1339                }
1340                // Emit any insertions after the end of this set range
1341                if *end_index < insert_after.len() {
1342                    for content in &insert_after[*end_index] {
1343                        result.push_str(content);
1344                        if !content.ends_with('\n') {
1345                            result.push('\n');
1346                        }
1347                    }
1348                }
1349                i = end_index + 1;
1350            } else {
1351                // Keep the original line
1352                result.push_str(original_lines[i]);
1353                result.push('\n');
1354                // Emit any insertions after this line
1355                for content in &insert_after[i] {
1356                    result.push_str(content);
1357                    if !content.ends_with('\n') {
1358                        result.push('\n');
1359                    }
1360                }
1361                i += 1;
1362            }
1363        }
1364
1365        // Preserve trailing newline behavior: if the original ended with a
1366        // newline the result already has one; if it didn't, trim the extra one
1367        // we added.
1368        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1369            result.pop();
1370        }
1371
1372        result
1373    }
1374
1375    /// Convert a unified diff patch into hashline edit commands.
1376    ///
1377    /// Parses the unified diff `patch` directly to determine which lines of
1378    /// `old_text` are deleted/replaced and what new lines are added, then emits
1379    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1380    /// `{index}:{hash}` identifiers.
1381    ///
1382    /// `cursor_offset` is an optional byte offset into the first hunk's new
1383    /// text (context + additions) where the cursor marker should be placed.
1384    pub fn patch_to_edit_commands(
1385        old_text: &str,
1386        patch: &str,
1387        cursor_offset: Option<usize>,
1388    ) -> Result<String> {
1389        let old_lines: Vec<&str> = old_text.lines().collect();
1390        let old_hashes: Vec<u8> = old_lines
1391            .iter()
1392            .map(|line| hash_line(line.as_bytes()))
1393            .collect();
1394
1395        let mut result = String::new();
1396        let mut first_hunk = true;
1397
1398        struct Hunk<'a> {
1399            line_range: Range<usize>,
1400            new_text_lines: Vec<&'a str>,
1401            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1402        }
1403
1404        // Parse the patch line by line. We only care about hunk headers,
1405        // context, deletions, and additions.
1406        let mut old_line_index: usize = 0;
1407        let mut current_hunk: Option<Hunk> = None;
1408        // Byte offset tracking within the hunk's new text for cursor placement.
1409        let mut new_text_byte_offset: usize = 0;
1410        // The line index of the last old line seen before/in the current hunk
1411        // (used for insert-after reference).
1412        let mut last_old_line_before_hunk: Option<usize> = None;
1413
1414        fn flush_hunk(
1415            hunk: Hunk,
1416            last_old_line: Option<usize>,
1417            result: &mut String,
1418            old_hashes: &[u8],
1419        ) {
1420            if hunk.line_range.is_empty() {
1421                // Pure insertion — reference the old line to insert after when in bounds.
1422                if let Some(after) = last_old_line
1423                    && let Some(&hash) = old_hashes.get(after)
1424                {
1425                    write!(
1426                        result,
1427                        "{INSERT_COMMAND_MARKER}{}\n",
1428                        LineRef { index: after, hash }
1429                    )
1430                    .unwrap();
1431                } else {
1432                    result.push_str(INSERT_COMMAND_MARKER);
1433                    result.push('\n');
1434                }
1435            } else {
1436                let start = hunk.line_range.start;
1437                let end_exclusive = hunk.line_range.end;
1438                let deleted_line_count = end_exclusive.saturating_sub(start);
1439
1440                if deleted_line_count == 1 {
1441                    if let Some(&hash) = old_hashes.get(start) {
1442                        write!(
1443                            result,
1444                            "{SET_COMMAND_MARKER}{}\n",
1445                            LineRef { index: start, hash }
1446                        )
1447                        .unwrap();
1448                    } else {
1449                        result.push_str(SET_COMMAND_MARKER);
1450                        result.push('\n');
1451                    }
1452                } else {
1453                    let end_inclusive = end_exclusive - 1;
1454                    match (
1455                        old_hashes.get(start).copied(),
1456                        old_hashes.get(end_inclusive).copied(),
1457                    ) {
1458                        (Some(start_hash), Some(end_hash)) => {
1459                            write!(
1460                                result,
1461                                "{SET_COMMAND_MARKER}{}-{}\n",
1462                                LineRef {
1463                                    index: start,
1464                                    hash: start_hash
1465                                },
1466                                LineRef {
1467                                    index: end_inclusive,
1468                                    hash: end_hash
1469                                }
1470                            )
1471                            .unwrap();
1472                        }
1473                        _ => {
1474                            result.push_str(SET_COMMAND_MARKER);
1475                            result.push('\n');
1476                        }
1477                    }
1478                }
1479            }
1480            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1481                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1482                    && line_offset == cursor_line_offset
1483                {
1484                    result.push_str(&line[..char_offset]);
1485                    result.push_str(CURSOR_MARKER);
1486                    result.push_str(&line[char_offset..]);
1487                    continue;
1488                }
1489
1490                result.push_str(line);
1491            }
1492        }
1493
1494        for raw_line in patch.split_inclusive('\n') {
1495            if raw_line.starts_with("@@") {
1496                // Flush any pending change hunk from a previous patch hunk.
1497                if let Some(hunk) = current_hunk.take() {
1498                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1499                }
1500
1501                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1502                // We intentionally do not trust old_start as a direct local index into `old_text`,
1503                // because some patches are produced against a larger file region and carry
1504                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1505                if first_hunk {
1506                    new_text_byte_offset = 0;
1507                    first_hunk = false;
1508                }
1509                continue;
1510            }
1511
1512            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1513                continue;
1514            }
1515            if raw_line.starts_with("\\ No newline") {
1516                continue;
1517            }
1518
1519            if raw_line.starts_with('-') {
1520                // Extend or start a change hunk with this deleted old line.
1521                match &mut current_hunk {
1522                    Some(Hunk {
1523                        line_range: range, ..
1524                    }) => range.end = old_line_index + 1,
1525                    None => {
1526                        current_hunk = Some(Hunk {
1527                            line_range: old_line_index..old_line_index + 1,
1528                            new_text_lines: Vec::new(),
1529                            cursor_line_offset_in_new_text: None,
1530                        });
1531                    }
1532                }
1533                old_line_index += 1;
1534            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1535                // Place cursor marker if cursor_offset falls within this line.
1536                let mut cursor_line_offset = None;
1537                if let Some(cursor_off) = cursor_offset
1538                    && (first_hunk
1539                        || cursor_off >= new_text_byte_offset
1540                            && cursor_off <= new_text_byte_offset + added_content.len())
1541                {
1542                    let line_offset = added_content.floor_char_boundary(
1543                        cursor_off
1544                            .saturating_sub(new_text_byte_offset)
1545                            .min(added_content.len()),
1546                    );
1547                    cursor_line_offset = Some(line_offset);
1548                }
1549
1550                new_text_byte_offset += added_content.len();
1551
1552                let hunk = current_hunk.get_or_insert(Hunk {
1553                    line_range: old_line_index..old_line_index,
1554                    new_text_lines: vec![],
1555                    cursor_line_offset_in_new_text: None,
1556                });
1557                hunk.new_text_lines.push(added_content);
1558                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1559                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1560            } else {
1561                // Context line (starts with ' ' or is empty).
1562                if let Some(hunk) = current_hunk.take() {
1563                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1564                }
1565                last_old_line_before_hunk = Some(old_line_index);
1566                old_line_index += 1;
1567                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1568                new_text_byte_offset += content.len();
1569            }
1570        }
1571
1572        // Flush final group.
1573        if let Some(hunk) = current_hunk.take() {
1574            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1575        }
1576
1577        // Trim a single trailing newline.
1578        if result.ends_with('\n') {
1579            result.pop();
1580        }
1581
1582        if result.is_empty() {
1583            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1584        }
1585
1586        Ok(result)
1587    }
1588
1589    #[cfg(test)]
1590    mod tests {
1591        use super::*;
1592        use indoc::indoc;
1593
1594        #[test]
1595        fn test_format_cursor_region() {
1596            struct Case {
1597                name: &'static str,
1598                context: &'static str,
1599                editable_range: Range<usize>,
1600                cursor_offset: usize,
1601                expected: &'static str,
1602            }
1603
1604            let cases = [
1605                Case {
1606                    name: "basic_cursor_placement",
1607                    context: "hello world\n",
1608                    editable_range: 0..12,
1609                    cursor_offset: 5,
1610                    expected: indoc! {"
1611                    <|file_sep|>test.rs
1612                    <|fim_prefix|>
1613                    <|fim_middle|>current
1614                    0:5c|hello<|user_cursor|> world
1615                    <|fim_suffix|>
1616                    <|fim_middle|>updated
1617                    "},
1618                },
1619                Case {
1620                    name: "multiline_cursor_on_second_line",
1621                    context: "aaa\nbbb\nccc\n",
1622                    editable_range: 0..12,
1623                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1624                    expected: indoc! {"
1625                    <|file_sep|>test.rs
1626                    <|fim_prefix|>
1627                    <|fim_middle|>current
1628                    0:23|aaa
1629                    1:26|b<|user_cursor|>bb
1630                    2:29|ccc
1631                    <|fim_suffix|>
1632                    <|fim_middle|>updated
1633                    "},
1634                },
1635                Case {
1636                    name: "no_trailing_newline_in_context",
1637                    context: "line1\nline2",
1638                    editable_range: 0..11,
1639                    cursor_offset: 3,
1640                    expected: indoc! {"
1641                    <|file_sep|>test.rs
1642                    <|fim_prefix|>
1643                    <|fim_middle|>current
1644                    0:d9|lin<|user_cursor|>e1
1645                    1:da|line2
1646                    <|fim_suffix|>
1647                    <|fim_middle|>updated
1648                    "},
1649                },
1650                Case {
1651                    name: "leading_newline_in_editable_region",
1652                    context: "\nabc\n",
1653                    editable_range: 0..5,
1654                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1655                    expected: indoc! {"
1656                    <|file_sep|>test.rs
1657                    <|fim_prefix|>
1658                    <|fim_middle|>current
1659                    0:00|
1660                    1:26|a<|user_cursor|>bc
1661                    <|fim_suffix|>
1662                    <|fim_middle|>updated
1663                    "},
1664                },
1665                Case {
1666                    name: "with_suffix",
1667                    context: "abc\ndef",
1668                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1669                    cursor_offset: 2,
1670                    expected: indoc! {"
1671                    <|file_sep|>test.rs
1672                    <|fim_prefix|>
1673                    <|fim_middle|>current
1674                    0:26|ab<|user_cursor|>c
1675                    <|fim_suffix|>
1676                    def
1677                    <|fim_middle|>updated
1678                    "},
1679                },
1680                Case {
1681                    name: "unicode_two_byte_chars",
1682                    context: "héllo\n",
1683                    editable_range: 0..7,
1684                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1685                    expected: indoc! {"
1686                    <|file_sep|>test.rs
1687                    <|fim_prefix|>
1688                    <|fim_middle|>current
1689                    0:1b|hé<|user_cursor|>llo
1690                    <|fim_suffix|>
1691                    <|fim_middle|>updated
1692                    "},
1693                },
1694                Case {
1695                    name: "unicode_three_byte_chars",
1696                    context: "日本語\n",
1697                    editable_range: 0..10,
1698                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1699                    expected: indoc! {"
1700                    <|file_sep|>test.rs
1701                    <|fim_prefix|>
1702                    <|fim_middle|>current
1703                    0:80|日本<|user_cursor|>語
1704                    <|fim_suffix|>
1705                    <|fim_middle|>updated
1706                    "},
1707                },
1708                Case {
1709                    name: "unicode_four_byte_chars",
1710                    context: "a🌍b\n",
1711                    editable_range: 0..7,
1712                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1713                    expected: indoc! {"
1714                    <|file_sep|>test.rs
1715                    <|fim_prefix|>
1716                    <|fim_middle|>current
1717                    0:6b|a🌍<|user_cursor|>b
1718                    <|fim_suffix|>
1719                    <|fim_middle|>updated
1720                    "},
1721                },
1722                Case {
1723                    name: "cursor_at_start_of_region_not_placed",
1724                    context: "abc\n",
1725                    editable_range: 0..4,
1726                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1727                    expected: indoc! {"
1728                    <|file_sep|>test.rs
1729                    <|fim_prefix|>
1730                    <|fim_middle|>current
1731                    0:26|abc
1732                    <|fim_suffix|>
1733                    <|fim_middle|>updated
1734                    "},
1735                },
1736                Case {
1737                    name: "cursor_at_end_of_line_not_placed",
1738                    context: "abc\ndef\n",
1739                    editable_range: 0..8,
1740                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1741                    expected: indoc! {"
1742                    <|file_sep|>test.rs
1743                    <|fim_prefix|>
1744                    <|fim_middle|>current
1745                    0:26|abc
1746                    1:2f|def
1747                    <|fim_suffix|>
1748                    <|fim_middle|>updated
1749                    "},
1750                },
1751                Case {
1752                    name: "cursor_offset_relative_to_context_not_editable_region",
1753                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1754                    // write_cursor_excerpt_section must subtract it before comparing against
1755                    // per-line offsets within the editable region.
1756                    context: "pre\naaa\nbbb\nsuf\n",
1757                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1758                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1759                    expected: indoc! {"
1760                    <|file_sep|>test.rs
1761                    <|fim_prefix|>
1762                    pre
1763                    <|fim_middle|>current
1764                    0:23|aaa
1765                    1:26|b<|user_cursor|>bb
1766                    <|fim_suffix|>
1767                    suf
1768                    <|fim_middle|>updated
1769                    "},
1770                },
1771            ];
1772
1773            for case in &cases {
1774                let mut prompt = String::new();
1775                hashline::write_cursor_excerpt_section(
1776                    &mut prompt,
1777                    Path::new("test.rs"),
1778                    case.context,
1779                    &case.editable_range,
1780                    case.cursor_offset,
1781                );
1782                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1783            }
1784        }
1785
1786        #[test]
1787        fn test_apply_edit_commands() {
1788            struct Case {
1789                name: &'static str,
1790                original: &'static str,
1791                model_output: &'static str,
1792                expected: &'static str,
1793            }
1794
1795            let cases = vec![
1796                Case {
1797                    name: "set_single_line",
1798                    original: indoc! {"
1799                    let mut total = 0;
1800                    for product in products {
1801                        total += ;
1802                    }
1803                    total
1804                "},
1805                    model_output: indoc! {"
1806                    <|set|>2:87
1807                        total += product.price;
1808                "},
1809                    expected: indoc! {"
1810                    let mut total = 0;
1811                    for product in products {
1812                        total += product.price;
1813                    }
1814                    total
1815                "},
1816                },
1817                Case {
1818                    name: "set_range",
1819                    original: indoc! {"
1820                    fn foo() {
1821                        let x = 1;
1822                        let y = 2;
1823                        let z = 3;
1824                    }
1825                "},
1826                    model_output: indoc! {"
1827                    <|set|>1:46-3:4a
1828                        let sum = 6;
1829                "},
1830                    expected: indoc! {"
1831                    fn foo() {
1832                        let sum = 6;
1833                    }
1834                "},
1835                },
1836                Case {
1837                    name: "insert_after_line",
1838                    original: indoc! {"
1839                    fn main() {
1840                        let x = 1;
1841                    }
1842                "},
1843                    model_output: indoc! {"
1844                    <|insert|>1:46
1845                        let y = 2;
1846                "},
1847                    expected: indoc! {"
1848                    fn main() {
1849                        let x = 1;
1850                        let y = 2;
1851                    }
1852                "},
1853                },
1854                Case {
1855                    name: "insert_before_first",
1856                    original: indoc! {"
1857                    let x = 1;
1858                    let y = 2;
1859                "},
1860                    model_output: indoc! {"
1861                    <|insert|>
1862                    use std::io;
1863                "},
1864                    expected: indoc! {"
1865                    use std::io;
1866                    let x = 1;
1867                    let y = 2;
1868                "},
1869                },
1870                Case {
1871                    name: "set_with_cursor_marker",
1872                    original: indoc! {"
1873                    fn main() {
1874                        println!();
1875                    }
1876                "},
1877                    model_output: indoc! {"
1878                    <|set|>1:34
1879                        eprintln!(\"<|user_cursor|>\");
1880                "},
1881                    expected: indoc! {"
1882                    fn main() {
1883                        eprintln!(\"<|user_cursor|>\");
1884                    }
1885                "},
1886                },
1887                Case {
1888                    name: "multiple_set_commands",
1889                    original: indoc! {"
1890                    aaa
1891                    bbb
1892                    ccc
1893                    ddd
1894                "},
1895                    model_output: indoc! {"
1896                    <|set|>0:23
1897                    AAA
1898                    <|set|>2:29
1899                    CCC
1900                "},
1901                    expected: indoc! {"
1902                    AAA
1903                    bbb
1904                    CCC
1905                    ddd
1906                "},
1907                },
1908                Case {
1909                    name: "set_range_multiline_replacement",
1910                    original: indoc! {"
1911                    fn handle_submit() {
1912                    }
1913
1914                    fn handle_keystroke() {
1915                "},
1916                    model_output: indoc! {"
1917                    <|set|>0:3f-1:7d
1918                    fn handle_submit(modal_state: &mut ModalState) {
1919                        <|user_cursor|>
1920                    }
1921                "},
1922                    expected: indoc! {"
1923                    fn handle_submit(modal_state: &mut ModalState) {
1924                        <|user_cursor|>
1925                    }
1926
1927                    fn handle_keystroke() {
1928                "},
1929                },
1930                Case {
1931                    name: "no_edit_commands_returns_original",
1932                    original: indoc! {"
1933                    hello
1934                    world
1935                "},
1936                    model_output: "some random text with no commands",
1937                    expected: indoc! {"
1938                    hello
1939                    world
1940                "},
1941                },
1942                Case {
1943                    name: "no_edits_command_returns_original",
1944                    original: indoc! {"
1945                    hello
1946                    world
1947                "},
1948                    model_output: "<|no_edits|>",
1949                    expected: indoc! {"
1950                    hello
1951                    world
1952                "},
1953                },
1954                Case {
1955                    name: "wrong_hash_set_ignored",
1956                    original: indoc! {"
1957                    aaa
1958                    bbb
1959                "},
1960                    model_output: indoc! {"
1961                    <|set|>0:ff
1962                    ZZZ
1963                "},
1964                    expected: indoc! {"
1965                    aaa
1966                    bbb
1967                "},
1968                },
1969                Case {
1970                    name: "insert_and_set_combined",
1971                    original: indoc! {"
1972                    alpha
1973                    beta
1974                    gamma
1975                "},
1976                    model_output: indoc! {"
1977                    <|set|>0:06
1978                    ALPHA
1979                    <|insert|>1:9c
1980                    beta_extra
1981                "},
1982                    expected: indoc! {"
1983                    ALPHA
1984                    beta
1985                    beta_extra
1986                    gamma
1987                "},
1988                },
1989                Case {
1990                    name: "no_trailing_newline_preserved",
1991                    original: "hello\nworld",
1992                    model_output: indoc! {"
1993                    <|set|>0:14
1994                    HELLO
1995                "},
1996                    expected: "HELLO\nworld",
1997                },
1998                Case {
1999                    name: "set_range_hash_mismatch_in_end_bound",
2000                    original: indoc! {"
2001                    one
2002                    two
2003                    three
2004                "},
2005                    model_output: indoc! {"
2006                    <|set|>0:42-2:ff
2007                    ONE_TWO_THREE
2008                "},
2009                    expected: indoc! {"
2010                    one
2011                    two
2012                    three
2013                "},
2014                },
2015                Case {
2016                    name: "set_range_start_greater_than_end_ignored",
2017                    original: indoc! {"
2018                    a
2019                    b
2020                    c
2021                "},
2022                    model_output: indoc! {"
2023                    <|set|>2:63-1:62
2024                    X
2025                "},
2026                    expected: indoc! {"
2027                    a
2028                    b
2029                    c
2030                "},
2031                },
2032                Case {
2033                    name: "insert_out_of_bounds_ignored",
2034                    original: indoc! {"
2035                    x
2036                    y
2037                "},
2038                    model_output: indoc! {"
2039                    <|insert|>99:aa
2040                    z
2041                "},
2042                    expected: indoc! {"
2043                    x
2044                    y
2045                "},
2046                },
2047                Case {
2048                    name: "set_out_of_bounds_ignored",
2049                    original: indoc! {"
2050                    x
2051                    y
2052                "},
2053                    model_output: indoc! {"
2054                    <|set|>99:aa
2055                    z
2056                "},
2057                    expected: indoc! {"
2058                    x
2059                    y
2060                "},
2061                },
2062                Case {
2063                    name: "malformed_set_command_ignored",
2064                    original: indoc! {"
2065                    alpha
2066                    beta
2067                "},
2068                    model_output: indoc! {"
2069                    <|set|>not-a-line-ref
2070                    UPDATED
2071                "},
2072                    expected: indoc! {"
2073                    alpha
2074                    beta
2075                "},
2076                },
2077                Case {
2078                    name: "malformed_insert_hash_treated_as_before_first",
2079                    original: indoc! {"
2080                    alpha
2081                    beta
2082                "},
2083                    model_output: indoc! {"
2084                    <|insert|>1:nothex
2085                    preamble
2086                "},
2087                    expected: indoc! {"
2088                    preamble
2089                    alpha
2090                    beta
2091                "},
2092                },
2093                Case {
2094                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2095                    original: indoc! {"
2096                    cat
2097                    dog
2098                "},
2099                    model_output: indoc! {"
2100                    <|set|>0:38
2101                    CAT
2102                    <|insert|>0:38
2103                    TAIL
2104                "},
2105                    expected: indoc! {"
2106                    CAT
2107                    TAIL
2108                    dog
2109                "},
2110                },
2111                Case {
2112                    name: "overlapping_set_ranges_last_wins",
2113                    original: indoc! {"
2114                    a
2115                    b
2116                    c
2117                    d
2118                "},
2119                    model_output: indoc! {"
2120                    <|set|>0:61-2:63
2121                    FIRST
2122                    <|set|>1:62-3:64
2123                    SECOND
2124                "},
2125                    expected: indoc! {"
2126                    FIRST
2127                    d
2128                "},
2129                },
2130                Case {
2131                    name: "insert_before_first_and_after_line",
2132                    original: indoc! {"
2133                    a
2134                    b
2135                "},
2136                    model_output: indoc! {"
2137                    <|insert|>
2138                    HEAD
2139                    <|insert|>0:61
2140                    MID
2141                "},
2142                    expected: indoc! {"
2143                    HEAD
2144                    a
2145                    MID
2146                    b
2147                "},
2148                },
2149            ];
2150
2151            for case in &cases {
2152                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2153                assert_eq!(result, case.expected, "failed case: {}", case.name);
2154            }
2155        }
2156
2157        #[test]
2158        fn test_output_has_edit_commands() {
2159            assert!(hashline::output_has_edit_commands(&format!(
2160                "{}0:ab\nnew",
2161                SET_COMMAND_MARKER
2162            )));
2163            assert!(hashline::output_has_edit_commands(&format!(
2164                "{}0:ab\nnew",
2165                INSERT_COMMAND_MARKER
2166            )));
2167            assert!(hashline::output_has_edit_commands(&format!(
2168                "some text\n{}1:cd\nstuff",
2169                SET_COMMAND_MARKER
2170            )));
2171            assert!(!hashline::output_has_edit_commands("just plain text"));
2172            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2173            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2174        }
2175
2176        // ---- hashline::patch_to_edit_commands round-trip tests ----
2177
2178        #[test]
2179        fn test_patch_to_edit_commands() {
2180            struct Case {
2181                name: &'static str,
2182                old: &'static str,
2183                patch: &'static str,
2184                expected_new: &'static str,
2185            }
2186
2187            let cases = [
2188                Case {
2189                    name: "single_line_replacement",
2190                    old: indoc! {"
2191                    let mut total = 0;
2192                    for product in products {
2193                        total += ;
2194                    }
2195                    total
2196                "},
2197                    patch: indoc! {"
2198                    @@ -1,5 +1,5 @@
2199                     let mut total = 0;
2200                     for product in products {
2201                    -    total += ;
2202                    +    total += product.price;
2203                     }
2204                     total
2205                "},
2206                    expected_new: indoc! {"
2207                    let mut total = 0;
2208                    for product in products {
2209                        total += product.price;
2210                    }
2211                    total
2212                "},
2213                },
2214                Case {
2215                    name: "multiline_replacement",
2216                    old: indoc! {"
2217                    fn foo() {
2218                        let x = 1;
2219                        let y = 2;
2220                        let z = 3;
2221                    }
2222                "},
2223                    patch: indoc! {"
2224                    @@ -1,5 +1,3 @@
2225                     fn foo() {
2226                    -    let x = 1;
2227                    -    let y = 2;
2228                    -    let z = 3;
2229                    +    let sum = 1 + 2 + 3;
2230                     }
2231                "},
2232                    expected_new: indoc! {"
2233                    fn foo() {
2234                        let sum = 1 + 2 + 3;
2235                    }
2236                "},
2237                },
2238                Case {
2239                    name: "insertion",
2240                    old: indoc! {"
2241                    fn main() {
2242                        let x = 1;
2243                    }
2244                "},
2245                    patch: indoc! {"
2246                    @@ -1,3 +1,4 @@
2247                     fn main() {
2248                         let x = 1;
2249                    +    let y = 2;
2250                     }
2251                "},
2252                    expected_new: indoc! {"
2253                    fn main() {
2254                        let x = 1;
2255                        let y = 2;
2256                    }
2257                "},
2258                },
2259                Case {
2260                    name: "insertion_before_first",
2261                    old: indoc! {"
2262                    let x = 1;
2263                    let y = 2;
2264                "},
2265                    patch: indoc! {"
2266                    @@ -1,2 +1,3 @@
2267                    +use std::io;
2268                     let x = 1;
2269                     let y = 2;
2270                "},
2271                    expected_new: indoc! {"
2272                    use std::io;
2273                    let x = 1;
2274                    let y = 2;
2275                "},
2276                },
2277                Case {
2278                    name: "deletion",
2279                    old: indoc! {"
2280                    aaa
2281                    bbb
2282                    ccc
2283                    ddd
2284                "},
2285                    patch: indoc! {"
2286                    @@ -1,4 +1,2 @@
2287                     aaa
2288                    -bbb
2289                    -ccc
2290                     ddd
2291                "},
2292                    expected_new: indoc! {"
2293                    aaa
2294                    ddd
2295                "},
2296                },
2297                Case {
2298                    name: "multiple_changes",
2299                    old: indoc! {"
2300                    alpha
2301                    beta
2302                    gamma
2303                    delta
2304                    epsilon
2305                "},
2306                    patch: indoc! {"
2307                    @@ -1,5 +1,5 @@
2308                    -alpha
2309                    +ALPHA
2310                     beta
2311                     gamma
2312                    -delta
2313                    +DELTA
2314                     epsilon
2315                "},
2316                    expected_new: indoc! {"
2317                    ALPHA
2318                    beta
2319                    gamma
2320                    DELTA
2321                    epsilon
2322                "},
2323                },
2324                Case {
2325                    name: "replace_with_insertion",
2326                    old: indoc! {r#"
2327                    fn handle() {
2328                        modal_state.close();
2329                        modal_state.dismiss();
2330                "#},
2331                    patch: indoc! {r#"
2332                    @@ -1,3 +1,4 @@
2333                     fn handle() {
2334                         modal_state.close();
2335                    +    eprintln!("");
2336                         modal_state.dismiss();
2337                "#},
2338                    expected_new: indoc! {r#"
2339                    fn handle() {
2340                        modal_state.close();
2341                        eprintln!("");
2342                        modal_state.dismiss();
2343                "#},
2344                },
2345                Case {
2346                    name: "complete_replacement",
2347                    old: indoc! {"
2348                    aaa
2349                    bbb
2350                    ccc
2351                "},
2352                    patch: indoc! {"
2353                    @@ -1,3 +1,3 @@
2354                    -aaa
2355                    -bbb
2356                    -ccc
2357                    +xxx
2358                    +yyy
2359                    +zzz
2360                "},
2361                    expected_new: indoc! {"
2362                    xxx
2363                    yyy
2364                    zzz
2365                "},
2366                },
2367                Case {
2368                    name: "add_function_body",
2369                    old: indoc! {"
2370                    fn foo() {
2371                        modal_state.dismiss();
2372                    }
2373
2374                    fn
2375
2376                    fn handle_keystroke() {
2377                "},
2378                    patch: indoc! {"
2379                    @@ -1,6 +1,8 @@
2380                     fn foo() {
2381                         modal_state.dismiss();
2382                     }
2383
2384                    -fn
2385                    +fn handle_submit() {
2386                    +    todo()
2387                    +}
2388
2389                     fn handle_keystroke() {
2390                "},
2391                    expected_new: indoc! {"
2392                    fn foo() {
2393                        modal_state.dismiss();
2394                    }
2395
2396                    fn handle_submit() {
2397                        todo()
2398                    }
2399
2400                    fn handle_keystroke() {
2401                "},
2402                },
2403                Case {
2404                    name: "with_cursor_offset",
2405                    old: indoc! {r#"
2406                    fn main() {
2407                        println!();
2408                    }
2409                "#},
2410                    patch: indoc! {r#"
2411                        @@ -1,3 +1,3 @@
2412                        fn main() {
2413                        -    println!();
2414                        +    eprintln!("");
2415                        }
2416                    "#},
2417                    expected_new: indoc! {r#"
2418                        fn main() {
2419                            eprintln!("<|user_cursor|>");
2420                        }
2421                    "#},
2422                },
2423                Case {
2424                    name: "non_local_hunk_header_pure_insertion_repro",
2425                    old: indoc! {"
2426                        aaa
2427                        bbb
2428                    "},
2429                    patch: indoc! {"
2430                        @@ -20,2 +20,3 @@
2431                        aaa
2432                        +xxx
2433                        bbb
2434                    "},
2435                    expected_new: indoc! {"
2436                        aaa
2437                        xxx
2438                        bbb
2439                    "},
2440                },
2441                Case {
2442                    name: "empty_patch_produces_no_edits_marker",
2443                    old: indoc! {"
2444                        aaa
2445                        bbb
2446                    "},
2447                    patch: "@@ -20,2 +20,3 @@\n",
2448                    expected_new: indoc! {"
2449                        aaa
2450                        bbb
2451                    "},
2452                },
2453            ];
2454
2455            for case in &cases {
2456                // The cursor_offset for patch_to_edit_commands is relative to
2457                // the first hunk's new text (context + additions). We compute
2458                // it by finding where the marker sits in the expected output
2459                // (which mirrors the new text of the hunk).
2460                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2461
2462                let commands =
2463                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2464                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2465
2466                assert!(
2467                    hashline::output_has_edit_commands(&commands),
2468                    "case {}: expected edit commands, got: {commands:?}",
2469                    case.name,
2470                );
2471
2472                let applied = hashline::apply_edit_commands(case.old, &commands);
2473                assert_eq!(applied, case.expected_new, "case {}", case.name);
2474            }
2475        }
2476    }
2477}
2478
2479pub mod seed_coder {
2480    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2481    //!
2482    //! Seed-Coder uses different FIM tokens and order than Qwen:
2483    //! - SPM order: suffix comes FIRST, then prefix, then middle
2484    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2485    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2486    //!
2487    //! All context (related files, edit history) goes in the PREFIX section.
2488    //! The suffix contains only code after the editable region.
2489    //!
2490    //! Example prompt:
2491    //!
2492    //! <[fim-suffix]>
2493    //! code after editable region
2494    //! <[fim-prefix]><filename>related/file.py
2495    //! related file content
2496    //!
2497    //! <filename>edit_history
2498    //! --- a/some_file.py
2499    //! +++ b/some_file.py
2500    //! -old
2501    //! +new
2502    //!
2503    //! <filename>path/to/target_file.py
2504    //! code before editable region
2505    //! <<<<<<< CURRENT
2506    //! code that
2507    //! needs to<|user_cursor|>
2508    //! be rewritten
2509    //! =======
2510    //! <[fim-middle]>
2511    //!
2512    //! Expected output (model generates):
2513    //!
2514    //! updated
2515    //! code with
2516    //! changes applied
2517    //! >>>>>>> UPDATED
2518
2519    use super::*;
2520
2521    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2522    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2523    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2524    pub const FILE_MARKER: &str = "<filename>";
2525
2526    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2527    pub const SEPARATOR: &str = "=======\n";
2528    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2529
2530    pub const NO_EDITS: &str = "NO_EDITS\n";
2531
2532    pub fn special_tokens() -> &'static [&'static str] {
2533        &[
2534            FIM_SUFFIX,
2535            FIM_PREFIX,
2536            FIM_MIDDLE,
2537            FILE_MARKER,
2538            START_MARKER,
2539            SEPARATOR,
2540            END_MARKER,
2541            CURSOR_MARKER,
2542        ]
2543    }
2544
2545    pub fn write_cursor_excerpt_section(
2546        prompt: &mut String,
2547        path: &Path,
2548        context: &str,
2549        editable_range: &Range<usize>,
2550        cursor_offset: usize,
2551    ) {
2552        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2553        prompt.push_str(&section);
2554    }
2555
2556    pub fn format_prompt_with_budget(
2557        path: &Path,
2558        context: &str,
2559        editable_range: &Range<usize>,
2560        cursor_offset: usize,
2561        events: &[Arc<Event>],
2562        related_files: &[RelatedFile],
2563        max_tokens: usize,
2564    ) -> String {
2565        let suffix_section = build_suffix_section(context, editable_range);
2566        let cursor_prefix_section =
2567            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2568
2569        let suffix_tokens = estimate_tokens(suffix_section.len());
2570        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2571        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2572
2573        let edit_history_section = super::format_edit_history_within_budget(
2574            events,
2575            FILE_MARKER,
2576            "edit_history",
2577            budget_after_cursor,
2578        );
2579        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2580        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2581
2582        let related_files_section = super::format_related_files_within_budget(
2583            related_files,
2584            FILE_MARKER,
2585            "",
2586            budget_after_edit_history,
2587        );
2588
2589        let mut prompt = String::new();
2590        prompt.push_str(&suffix_section);
2591        prompt.push_str(FIM_PREFIX);
2592        prompt.push_str(&related_files_section);
2593        if !related_files_section.is_empty() {
2594            prompt.push('\n');
2595        }
2596        prompt.push_str(&edit_history_section);
2597        if !edit_history_section.is_empty() {
2598            prompt.push('\n');
2599        }
2600        prompt.push_str(&cursor_prefix_section);
2601        prompt.push_str(FIM_MIDDLE);
2602        prompt
2603    }
2604
2605    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2606        let mut section = String::new();
2607        section.push_str(FIM_SUFFIX);
2608        section.push_str(&context[editable_range.end..]);
2609        if !section.ends_with('\n') {
2610            section.push('\n');
2611        }
2612        section
2613    }
2614
2615    fn build_cursor_prefix_section(
2616        path: &Path,
2617        context: &str,
2618        editable_range: &Range<usize>,
2619        cursor_offset: usize,
2620    ) -> String {
2621        let mut section = String::new();
2622        let path_str = path.to_string_lossy();
2623        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2624
2625        section.push_str(&context[..editable_range.start]);
2626        section.push_str(START_MARKER);
2627        section.push_str(&context[editable_range.start..cursor_offset]);
2628        section.push_str(CURSOR_MARKER);
2629        section.push_str(&context[cursor_offset..editable_range.end]);
2630        if !section.ends_with('\n') {
2631            section.push('\n');
2632        }
2633        section.push_str(SEPARATOR);
2634        section
2635    }
2636
2637    /// Format patch as containing no changes if it's empty; otherwise return None.
2638    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2639        // Count lines in the patch
2640        let empty_patch = patch.lines().count() <= 3;
2641        if empty_patch {
2642            Some(format!("{NO_EDITS}{END_MARKER}"))
2643        } else {
2644            None
2645        }
2646    }
2647}
2648
2649pub mod v0304_variable_edit {
2650    //! A prompt format with no fixed editable region. The entire context is shown
2651    //! to the model, and it chooses which text to replace by outputting surrounding
2652    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2653    //! text.
2654    //!
2655    //! Example prompt:
2656    //!
2657    //! <|file_sep|>path/to/file.py
2658    //! zero
2659    //! one
2660    //! two
2661    //! three<|user_cursor|>
2662    //! four
2663    //! five
2664    //! <|fim_prefix|>
2665    //
2666    //! Expected output (model generates):
2667    //!
2668    //! two
2669    //! <|fim_middle|>
2670    //! THREE
2671    //! <|fim_suffix|>
2672    //! four
2673    //!
2674    //! The output means: find "two\n...\nfour" in the context, and replace
2675    //! everything between "two\n" and "four" with "THREE\n".
2676
2677    use super::*;
2678
2679    pub fn special_tokens() -> &'static [&'static str] {
2680        &[
2681            "<|fim_prefix|>",
2682            "<|fim_suffix|>",
2683            "<|fim_middle|>",
2684            "<|file_sep|>",
2685            CURSOR_MARKER,
2686        ]
2687    }
2688
2689    pub fn write_cursor_excerpt_section(
2690        prompt: &mut String,
2691        path: &Path,
2692        context: &str,
2693        cursor_offset: usize,
2694    ) {
2695        let path_str = path.to_string_lossy();
2696        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2697
2698        prompt.push_str(&context[..cursor_offset]);
2699        prompt.push_str(CURSOR_MARKER);
2700        prompt.push_str(&context[cursor_offset..]);
2701        if !prompt.ends_with('\n') {
2702            prompt.push('\n');
2703        }
2704        prompt.push_str("<|fim_prefix|>\n")
2705    }
2706
2707    /// Apply a variable-edit model output to the original context text.
2708    ///
2709    /// The model output has the form:
2710    ///
2711    /// - prefix context lines
2712    /// - `<|fim_middle|>`
2713    /// - new text
2714    /// - `<|fim_suffix|>`
2715    /// - suffix context lines
2716    ///
2717    /// We locate the prefix/suffix context lines in the original text and replace
2718    /// everything between them with the new text.
2719    pub fn apply_variable_edit(
2720        context: &str,
2721        model_output: &str,
2722    ) -> Result<(Range<usize>, String)> {
2723        let (prefix_context, rest) = model_output
2724            .split_once("<|fim_middle|>\n")
2725            .or_else(|| model_output.split_once("<|fim_middle|>"))
2726            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2727
2728        let (new_text, suffix_context) = rest
2729            .split_once("<|fim_suffix|>\n")
2730            .or_else(|| rest.split_once("<|fim_suffix|>"))
2731            .unwrap_or((rest, ""));
2732
2733        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2734            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2735        } else {
2736            suffix_context
2737        };
2738
2739        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2740            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2741            + prefix_context.len();
2742        let suffix_offset = if suffix_context.is_empty() {
2743            context.len()
2744        } else {
2745            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2746                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2747                + prefix_offset
2748        };
2749
2750        let edit_range = prefix_offset..suffix_offset;
2751        return Ok((edit_range, new_text.to_string()));
2752    }
2753
2754    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2755        if needle.is_empty() {
2756            return Some(0);
2757        }
2758
2759        haystack.match_indices(needle).find_map(|(offset, _)| {
2760            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2761            matched_line_start.then_some(offset)
2762        })
2763    }
2764
2765    /// Convert a unified diff patch into the variable-edit output format.
2766    ///
2767    /// Parses `patch` as a unified diff against `old_text` and produces model
2768    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2769    /// delimiters. The diff is resolved by content matching rather than line
2770    /// numbers.
2771    pub fn patch_to_variable_edit_output(
2772        old_text: &str,
2773        patch: &str,
2774        cursor_offset: Option<usize>,
2775    ) -> Result<String> {
2776        // Parse the unified diff into hunks. Each hunk has an `old_context`
2777        // string (context + deleted lines interleaved in order) and a list of
2778        // edits expressed as byte ranges within that context plus replacement
2779        // text.
2780        let hunks = parse_hunks(patch);
2781        if hunks.is_empty() {
2782            return Ok(String::new());
2783        }
2784
2785        // Apply each hunk by finding its old_context in the text and
2786        // performing the edits. We search forward from where the previous
2787        // hunk ended so that hunks are applied in order.
2788        let mut new_text = old_text.to_string();
2789        let mut search_from: usize = 0;
2790        let mut first_hunk_pos: Option<usize> = None;
2791
2792        for hunk in &hunks {
2793            let context_pos = new_text[search_from..]
2794                .find(&hunk.old_context)
2795                .map(|pos| pos + search_from)
2796                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2797
2798            if first_hunk_pos.is_none() {
2799                first_hunk_pos = Some(context_pos);
2800            }
2801
2802            // Apply edits in reverse order so byte offsets remain valid.
2803            for edit in hunk.edits.iter().rev() {
2804                let abs_start = context_pos + edit.range.start;
2805                let abs_end = context_pos + edit.range.end;
2806                new_text.replace_range(abs_start..abs_end, &edit.text);
2807            }
2808
2809            // Advance past this hunk's region in the (now modified) text.
2810            let new_region_len: usize =
2811                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2812                    len + edit.text.len() - (edit.range.end - edit.range.start)
2813                });
2814            search_from = context_pos + new_region_len;
2815        }
2816
2817        // Now we have old_text and new_text. Find the changed line range by
2818        // comparing them.
2819        let old_lines: Vec<&str> = old_text.lines().collect();
2820        let new_lines: Vec<&str> = new_text.lines().collect();
2821
2822        // Find first differing line.
2823        let first_changed_row = old_lines
2824            .iter()
2825            .zip(new_lines.iter())
2826            .position(|(a, b)| a != b)
2827            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2828
2829        // Find last differing line (from the end).
2830        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2831        let common_suffix = old_lines
2832            .iter()
2833            .rev()
2834            .zip(new_lines.iter().rev())
2835            .take(max_suffix)
2836            .take_while(|(a, b)| a == b)
2837            .count();
2838
2839        let old_end = old_lines.len() - common_suffix;
2840        let new_end = new_lines.len() - common_suffix;
2841
2842        if first_changed_row == old_end && first_changed_row == new_end {
2843            return Ok(String::new());
2844        }
2845
2846        // Build the replacement text from new_lines[first_diff..new_end].
2847        let mut merged_new_text = String::new();
2848        for line in &new_lines[first_changed_row..new_end] {
2849            merged_new_text.push_str(line);
2850            merged_new_text.push('\n');
2851        }
2852
2853        // cursor_offset is relative to the first hunk's new content in
2854        // new_text. Translate it to an offset within merged_new_text, which
2855        // only contains lines first_diff..new_end of new_text.
2856        if let Some(hunk_offset) = cursor_offset {
2857            let hunk_start = first_hunk_pos.unwrap_or(0);
2858            let absolute_pos = hunk_start + hunk_offset;
2859
2860            // Byte offset where first_diff starts in new_text.
2861            let merged_start: usize = new_lines[..first_changed_row]
2862                .iter()
2863                .map(|line| line.len() + 1)
2864                .sum();
2865
2866            if absolute_pos >= merged_start {
2867                let relative_offset = absolute_pos - merged_start;
2868                if relative_offset <= merged_new_text.len() {
2869                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2870                }
2871            }
2872        }
2873
2874        // Build output with 2 lines of context above and below.
2875        let context_lines_count = 2;
2876        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2877        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2878
2879        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2880            let pattern = &lines[line_range];
2881            let pattern_len = pattern.len();
2882
2883            let mut count = 0;
2884            for offset in 0..=lines.len() - pattern_len {
2885                if &lines[offset..offset + pattern_len] == pattern {
2886                    count += 1;
2887                }
2888            }
2889            count
2890        }
2891
2892        // Expand prefix and suffix until they are unique
2893        while prefix_start > 0 {
2894            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2895                prefix_start -= 1;
2896            } else {
2897                break;
2898            }
2899        }
2900        while suffix_end < old_lines.len() {
2901            if count_matches(old_end..suffix_end, &old_lines) > 1 {
2902                suffix_end += 1;
2903            } else {
2904                break;
2905            }
2906        }
2907
2908        let mut output = String::new();
2909        for line in &old_lines[prefix_start..first_changed_row] {
2910            output.push_str(line);
2911            output.push('\n');
2912        }
2913        output.push_str("<|fim_middle|>\n");
2914        output.push_str(&merged_new_text);
2915        output.push_str("<|fim_suffix|>\n");
2916        for line in &old_lines[old_end..suffix_end] {
2917            output.push_str(line);
2918            output.push('\n');
2919        }
2920
2921        Ok(output)
2922    }
2923
2924    struct ParsedHunk {
2925        old_context: String,
2926        edits: Vec<ParsedEdit>,
2927    }
2928
2929    struct ParsedEdit {
2930        range: Range<usize>,
2931        text: String,
2932    }
2933
2934    /// Parse a unified diff into content-based hunks. Each hunk contains an
2935    /// `old_context` string (context lines + deleted lines, which together
2936    /// form the text that should be found in the original) and a list of edits
2937    /// expressed as byte ranges within that context.
2938    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2939        let mut hunks = Vec::new();
2940        let mut current: Option<ParsedHunk> = None;
2941
2942        for line in patch.lines() {
2943            if line.starts_with("@@") {
2944                if let Some(hunk) = current.take() {
2945                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2946                        hunks.push(hunk);
2947                    }
2948                }
2949                current = Some(ParsedHunk {
2950                    old_context: String::new(),
2951                    edits: Vec::new(),
2952                });
2953            } else if line.starts_with("---") || line.starts_with("+++") {
2954                continue;
2955            } else if let Some(hunk) = &mut current {
2956                if let Some(added) = line.strip_prefix('+') {
2957                    let pos = hunk.old_context.len();
2958                    if let Some(last_edit) = hunk.edits.last_mut() {
2959                        if last_edit.range.end == pos {
2960                            writeln!(&mut last_edit.text, "{added}").ok();
2961                            continue;
2962                        }
2963                    }
2964                    hunk.edits.push(ParsedEdit {
2965                        range: pos..pos,
2966                        text: format!("{added}\n"),
2967                    });
2968                } else if let Some(removed) = line.strip_prefix('-') {
2969                    let start = hunk.old_context.len();
2970                    writeln!(&mut hunk.old_context, "{removed}").ok();
2971                    let end = hunk.old_context.len();
2972                    if let Some(last_edit) = hunk.edits.last_mut() {
2973                        if last_edit.range.end == start {
2974                            last_edit.range.end = end;
2975                            continue;
2976                        }
2977                    }
2978                    hunk.edits.push(ParsedEdit {
2979                        range: start..end,
2980                        text: String::new(),
2981                    });
2982                } else {
2983                    let ctx = line.strip_prefix(' ').unwrap_or(line);
2984                    writeln!(&mut hunk.old_context, "{ctx}").ok();
2985                }
2986            }
2987        }
2988
2989        if let Some(hunk) = current {
2990            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2991                hunks.push(hunk);
2992            }
2993        }
2994
2995        hunks
2996    }
2997
2998    #[cfg(test)]
2999    mod tests {
3000        use super::*;
3001        use indoc::indoc;
3002
3003        #[test]
3004        fn test_apply_variable_edit() {
3005            struct Case {
3006                name: &'static str,
3007                original: &'static str,
3008                model_output: &'static str,
3009                expected: &'static str,
3010            }
3011
3012            let cases = [
3013                Case {
3014                    name: "simple_single_line_replacement",
3015                    original: indoc! {"
3016                        zero
3017                        one
3018                        two
3019                        three
3020                        four
3021                        five
3022                    "},
3023                    model_output: indoc! {"
3024                        two
3025                        <|fim_middle|>
3026                        THREE
3027                        <|fim_suffix|>
3028                        four
3029                    "},
3030                    expected: indoc! {"
3031                        zero
3032                        one
3033                        two
3034                        THREE
3035                        four
3036                        five
3037                    "},
3038                },
3039                Case {
3040                    name: "multi_line_replacement",
3041                    original: indoc! {"
3042                        a
3043                        b
3044                        c
3045                        d
3046                        e
3047                    "},
3048                    model_output: indoc! {"
3049                        a
3050                        <|fim_middle|>
3051                        B
3052                        C
3053                        D
3054                        <|fim_suffix|>
3055                        e
3056                    "},
3057                    expected: indoc! {"
3058                        a
3059                        B
3060                        C
3061                        D
3062                        e
3063                    "},
3064                },
3065                Case {
3066                    name: "insertion_between_existing_lines",
3067                    original: indoc! {"
3068                        a
3069                        b
3070                        c
3071                    "},
3072                    model_output: indoc! {"
3073                        a
3074                        <|fim_middle|>
3075                        X
3076                        <|fim_suffix|>
3077                        b
3078                    "},
3079                    expected: indoc! {"
3080                        a
3081                        X
3082                        b
3083                        c
3084                    "},
3085                },
3086                Case {
3087                    name: "deletion",
3088                    original: indoc! {"
3089                        a
3090                        b
3091                        c
3092                        d
3093                    "},
3094                    model_output: indoc! {"
3095                        a
3096                        <|fim_middle|>
3097                        <|fim_suffix|>
3098                        c
3099                    "},
3100                    expected: indoc! {"
3101                        a
3102                        c
3103                        d
3104                    "},
3105                },
3106                Case {
3107                    name: "replacement_at_start_no_prefix_context",
3108                    original: indoc! {"
3109                        a
3110                        b
3111                        c
3112                    "},
3113                    model_output: indoc! {"
3114                        <|fim_middle|>
3115                        X
3116                        <|fim_suffix|>
3117                        b
3118                    "},
3119                    expected: indoc! {"
3120                        X
3121                        b
3122                        c
3123                    "},
3124                },
3125                Case {
3126                    name: "replacement_at_end_no_suffix_context",
3127                    original: indoc! {"
3128                        a
3129                        b
3130                        c
3131                    "},
3132                    model_output: indoc! {"
3133                        b
3134                        <|fim_middle|>
3135                        Z
3136                        <|fim_suffix|>
3137                    "},
3138                    expected: indoc! {"
3139                        a
3140                        b
3141                        Z
3142                    "},
3143                },
3144                Case {
3145                    name: "context_with_trailing_newline_is_preserved",
3146                    original: indoc! {"
3147                        a
3148                        b
3149                        c
3150                    "},
3151                    model_output: indoc! {"
3152                        a
3153                        <|fim_middle|>
3154                        B
3155                        <|fim_suffix|>
3156                        c
3157                    "},
3158                    expected: indoc! {"
3159                        a
3160                        B
3161                        c
3162                    "},
3163                },
3164                Case {
3165                    name: "cursor_marker_passes_through_untouched",
3166                    original: indoc! {"
3167                        a
3168                        b
3169                        c
3170                    "},
3171                    model_output: indoc! {"
3172                        a
3173                        <|fim_middle|>
3174                        B<|user_cursor|>B
3175                        <|fim_suffix|>
3176                        c
3177                    "},
3178                    expected: indoc! {"
3179                        a
3180                        B<|user_cursor|>B
3181                        c
3182                    "},
3183                },
3184                Case {
3185                    name: "multiple_prefix_context_lines",
3186                    original: indoc! {"
3187                        a
3188                        b
3189                        c
3190                        d
3191                        e
3192                    "},
3193                    model_output: indoc! {"
3194                        b
3195                        c
3196                        <|fim_middle|>
3197                        D
3198                        <|fim_suffix|>
3199                        e
3200                    "},
3201                    expected: indoc! {"
3202                        a
3203                        b
3204                        c
3205                        D
3206                        e
3207                    "},
3208                },
3209            ];
3210
3211            for case in cases {
3212                let (edit_range, replacement) =
3213                    apply_variable_edit(case.original, case.model_output).unwrap();
3214                let mut edited = case.original.to_string();
3215                edited.replace_range(edit_range, &replacement);
3216                assert_eq!(edited, case.expected, "{}", case.name);
3217            }
3218        }
3219
3220        #[test]
3221        fn test_patch_to_variable_edit() {
3222            struct Case {
3223                name: &'static str,
3224                old: &'static str,
3225                patch: &'static str,
3226                cursor_offset: Option<usize>,
3227                expected_variable_edit: &'static str,
3228                expected_after_apply: &'static str,
3229            }
3230
3231            let cases = [
3232                Case {
3233                    name: "simple_replacement",
3234                    old: indoc! {"
3235                        zero
3236                        one
3237                        two
3238                        three
3239                        four
3240                        five
3241                    "},
3242                    patch: indoc! {"
3243                        @@ -3,3 +3,3 @@
3244                         two
3245                        -three
3246                        +THREE
3247                         four
3248                    "},
3249                    cursor_offset: None,
3250                    expected_variable_edit: indoc! {"
3251                        one
3252                        two
3253                        <|fim_middle|>
3254                        THREE
3255                        <|fim_suffix|>
3256                        four
3257                        five
3258                    "},
3259                    expected_after_apply: indoc! {"
3260                        zero
3261                        one
3262                        two
3263                        THREE
3264                        four
3265                        five
3266                    "},
3267                },
3268                Case {
3269                    name: "insertion",
3270                    old: indoc! {"
3271                        a
3272                        b
3273                        c
3274                        d
3275                        e
3276                    "},
3277                    patch: indoc! {"
3278                        @@ -2,0 +3,1 @@
3279                         b
3280                        +X
3281                         c
3282                    "},
3283                    cursor_offset: None,
3284                    expected_variable_edit: indoc! {"
3285                        a
3286                        b
3287                        <|fim_middle|>
3288                        X
3289                        <|fim_suffix|>
3290                        c
3291                        d
3292                    "},
3293                    expected_after_apply: indoc! {"
3294                        a
3295                        b
3296                        X
3297                        c
3298                        d
3299                        e
3300                    "},
3301                },
3302                Case {
3303                    name: "deletion",
3304                    old: indoc! {"
3305                        a
3306                        b
3307                        c
3308                        d
3309                        e
3310                    "},
3311                    patch: indoc! {"
3312                        @@ -2,3 +2,2 @@
3313                         b
3314                        -c
3315                         d
3316                    "},
3317                    cursor_offset: None,
3318                    expected_variable_edit: indoc! {"
3319                        a
3320                        b
3321                        <|fim_middle|>
3322                        <|fim_suffix|>
3323                        d
3324                        e
3325                    "},
3326                    expected_after_apply: indoc! {"
3327                        a
3328                        b
3329                        d
3330                        e
3331                    "},
3332                },
3333                Case {
3334                    name: "edit_near_start",
3335                    old: indoc! {"
3336                        first
3337                        second
3338                        third
3339                        fourth
3340                    "},
3341                    patch: indoc! {"
3342                        @@ -1,1 +1,1 @@
3343                        -first
3344                        +FIRST
3345                    "},
3346                    cursor_offset: None,
3347                    expected_variable_edit: indoc! {"
3348                        <|fim_middle|>
3349                        FIRST
3350                        <|fim_suffix|>
3351                        second
3352                        third
3353                    "},
3354                    expected_after_apply: indoc! {"
3355                        FIRST
3356                        second
3357                        third
3358                        fourth
3359                    "},
3360                },
3361                Case {
3362                    name: "edit_near_end",
3363                    old: indoc! {"
3364                        first
3365                        second
3366                        third
3367                        fourth
3368                    "},
3369                    patch: indoc! {"
3370                        @@ -4,1 +4,1 @@
3371                        -fourth
3372                        +FOURTH
3373                    "},
3374                    cursor_offset: None,
3375                    expected_variable_edit: indoc! {"
3376                        second
3377                        third
3378                        <|fim_middle|>
3379                        FOURTH
3380                        <|fim_suffix|>
3381                    "},
3382                    expected_after_apply: indoc! {"
3383                        first
3384                        second
3385                        third
3386                        FOURTH
3387                    "},
3388                },
3389                Case {
3390                    name: "cursor_at_start_of_replacement",
3391                    old: indoc! {"
3392                        zero
3393                        one
3394                        two
3395                        three
3396                        four
3397                        five
3398                    "},
3399                    patch: indoc! {"
3400                        @@ -3,3 +3,3 @@
3401                         two
3402                        -three
3403                        +THREE
3404                         four
3405                    "},
3406                    cursor_offset: Some(4),
3407                    expected_variable_edit: indoc! {"
3408                        one
3409                        two
3410                        <|fim_middle|>
3411                        <|user_cursor|>THREE
3412                        <|fim_suffix|>
3413                        four
3414                        five
3415                    "},
3416                    expected_after_apply: indoc! {"
3417                        zero
3418                        one
3419                        two
3420                        <|user_cursor|>THREE
3421                        four
3422                        five
3423                    "},
3424                },
3425                Case {
3426                    name: "cursor_in_middle_of_replacement",
3427                    old: indoc! {"
3428                        zero
3429                        one
3430                        two
3431                        three
3432                        four
3433                        five
3434                    "},
3435                    patch: indoc! {"
3436                        @@ -3,3 +3,3 @@
3437                         two
3438                        -three
3439                        +THREE
3440                         four
3441                    "},
3442                    cursor_offset: Some(6),
3443                    expected_variable_edit: indoc! {"
3444                        one
3445                        two
3446                        <|fim_middle|>
3447                        TH<|user_cursor|>REE
3448                        <|fim_suffix|>
3449                        four
3450                        five
3451                    "},
3452                    expected_after_apply: indoc! {"
3453                        zero
3454                        one
3455                        two
3456                        TH<|user_cursor|>REE
3457                        four
3458                        five
3459                    "},
3460                },
3461                Case {
3462                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3463                    old: indoc! {"
3464                        one
3465                        a
3466                        b
3467                        c
3468                        d
3469                        two
3470                        a
3471                        b
3472                        c
3473                        d
3474                        three
3475                        a
3476                        b
3477                        c
3478                        d
3479                        four
3480                    "},
3481                    patch: indoc! {"
3482                        @@ -4,5 +4,5 @@
3483                         two
3484                         a
3485                         b
3486                        -c
3487                        +C
3488                         d
3489                         three
3490                    "},
3491                    cursor_offset: None,
3492                    expected_variable_edit: indoc! {"
3493                        two
3494                        a
3495                        b
3496                        <|fim_middle|>
3497                        C
3498                        <|fim_suffix|>
3499                        d
3500                        three
3501                    "},
3502                    expected_after_apply: indoc! {"
3503                        one
3504                        a
3505                        b
3506                        c
3507                        d
3508                        two
3509                        a
3510                        b
3511                        C
3512                        d
3513                        three
3514                        a
3515                        b
3516                        c
3517                        d
3518                        four
3519                    "},
3520                },
3521                Case {
3522                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3523                    old: indoc! {"
3524                        {
3525                            {
3526                                one();
3527                            }
3528                        }
3529                        {
3530                            {
3531                                two();
3532                            }
3533                        }
3534                        {
3535                            {
3536                                three();
3537                            }
3538                        }
3539                        {
3540                            {
3541                                four();
3542                            }
3543                        }
3544                    "},
3545                    patch: indoc! {"
3546                        @@ -4,5 +4,5 @@
3547                             {
3548                        -        two();
3549                        +        TWO();
3550                             }
3551                    "},
3552                    cursor_offset: None,
3553                    expected_variable_edit: indoc! {"
3554                                one();
3555                            }
3556                        }
3557                        {
3558                            {
3559                        <|fim_middle|>
3560                                TWO();
3561                        <|fim_suffix|>
3562                            }
3563                        }
3564                        {
3565                            {
3566                                three();
3567                    "},
3568                    expected_after_apply: indoc! {"
3569                        {
3570                            {
3571                                one();
3572                            }
3573                        }
3574                        {
3575                            {
3576                                TWO();
3577                            }
3578                        }
3579                        {
3580                            {
3581                                three();
3582                            }
3583                        }
3584                        {
3585                            {
3586                                four();
3587                            }
3588                        }
3589                    "},
3590                },
3591            ];
3592
3593            for case in cases {
3594                let output =
3595                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3596                        .unwrap_or_else(|error| {
3597                            panic!("failed converting patch for {}: {error}", case.name)
3598                        });
3599                assert_eq!(
3600                    output, case.expected_variable_edit,
3601                    "patch->variable_edit mismatch for {}",
3602                    case.name
3603                );
3604
3605                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3606                    .unwrap_or_else(|error| {
3607                        panic!("failed applying variable_edit for {}: {error}", case.name)
3608                    });
3609                let mut edited_by_variable_edit = case.old.to_string();
3610                edited_by_variable_edit.replace_range(edit_range, &replacement);
3611                assert_eq!(
3612                    edited_by_variable_edit, case.expected_after_apply,
3613                    "variable_edit apply mismatch for {}",
3614                    case.name
3615                );
3616
3617                let (expected_edit_range, expected_replacement) =
3618                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3619                        |error| {
3620                            panic!(
3621                                "failed applying expected variable_edit for {}: {error}",
3622                                case.name
3623                            )
3624                        },
3625                    );
3626                let mut edited_by_expected_variable_edit = case.old.to_string();
3627                edited_by_expected_variable_edit
3628                    .replace_range(expected_edit_range, &expected_replacement);
3629                assert_eq!(
3630                    edited_by_expected_variable_edit, case.expected_after_apply,
3631                    "expected variable_edit apply mismatch for {}",
3632                    case.name
3633                );
3634            }
3635        }
3636
3637        #[test]
3638        fn test_write_cursor_excerpt_section() {
3639            let path = Path::new("test.rs");
3640            let context = "fn main() {\n    hello();\n}\n";
3641            let cursor_offset = 17;
3642            let mut prompt = String::new();
3643            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3644            assert_eq!(
3645                prompt,
3646                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3647            );
3648        }
3649    }
3650}
3651
3652/// The zeta1 prompt format
3653pub mod zeta1 {
3654    use super::*;
3655    use std::fmt::Write;
3656
3657    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3658    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3659    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3660    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3661
3662    const INSTRUCTION_HEADER: &str = concat!(
3663        "### Instruction:\n",
3664        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3665        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3666        "into account the cursor location.\n\n",
3667        "### User Edits:\n\n"
3668    );
3669    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3670    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3671
3672    /// Formats a complete zeta1 prompt from the input events and excerpt.
3673    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3674        let mut prompt = String::with_capacity(
3675            INSTRUCTION_HEADER.len()
3676                + input_events.len()
3677                + EXCERPT_HEADER.len()
3678                + input_excerpt.len()
3679                + RESPONSE_HEADER.len(),
3680        );
3681        prompt.push_str(INSTRUCTION_HEADER);
3682        prompt.push_str(input_events);
3683        prompt.push_str(EXCERPT_HEADER);
3684        prompt.push_str(input_excerpt);
3685        prompt.push_str(RESPONSE_HEADER);
3686        prompt
3687    }
3688
3689    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3690    /// editable and context byte-offset ranges within `cursor_excerpt`.
3691    pub fn format_zeta1_from_input(
3692        input: &ZetaPromptInput,
3693        editable_range: Range<usize>,
3694        context_range: Range<usize>,
3695    ) -> String {
3696        let events = format_zeta1_events(&input.events);
3697        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3698        format_zeta1_prompt(&events, &excerpt)
3699    }
3700
3701    /// Formats events in zeta1 style (oldest first).
3702    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3703        let mut result = String::new();
3704        for event in events {
3705            let event_string = format_zeta1_event(event);
3706            if event_string.is_empty() {
3707                continue;
3708            }
3709            if !result.is_empty() {
3710                result.push_str("\n\n");
3711            }
3712            result.push_str(&event_string);
3713        }
3714        result
3715    }
3716
3717    fn format_zeta1_event(event: &Event) -> String {
3718        match event {
3719            Event::BufferChange {
3720                path,
3721                old_path,
3722                diff,
3723                ..
3724            } => {
3725                let mut prompt = String::new();
3726                if old_path != path {
3727                    writeln!(
3728                        prompt,
3729                        "User renamed {} to {}\n",
3730                        old_path.display(),
3731                        path.display()
3732                    )
3733                    .ok();
3734                }
3735                if !diff.is_empty() {
3736                    write!(
3737                        prompt,
3738                        "User edited {}:\n```diff\n{}\n```",
3739                        path.display(),
3740                        diff
3741                    )
3742                    .ok();
3743                }
3744                prompt
3745            }
3746        }
3747    }
3748
3749    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3750    /// within `cursor_excerpt`.
3751    fn format_zeta1_excerpt(
3752        input: &ZetaPromptInput,
3753        editable_range: Range<usize>,
3754        context_range: Range<usize>,
3755    ) -> String {
3756        let path_str = input.cursor_path.to_string_lossy();
3757        let excerpt = &*input.cursor_excerpt;
3758        let cursor_offset = input.cursor_offset_in_excerpt;
3759
3760        let mut prompt = String::new();
3761        writeln!(&mut prompt, "```{path_str}").ok();
3762
3763        let starts_at_file_beginning =
3764            input.excerpt_start_row == Some(0) && context_range.start == 0;
3765        if starts_at_file_beginning {
3766            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3767        }
3768
3769        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3770
3771        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3772        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3773        prompt.push_str(CURSOR_MARKER);
3774        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3775        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3776
3777        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3778        write!(prompt, "\n```").ok();
3779
3780        prompt
3781    }
3782
3783    /// Cleans zeta1 model output by extracting content between editable region
3784    /// markers and converting the zeta1 cursor marker to the universal one.
3785    /// Returns `None` if the output doesn't contain the expected markers.
3786    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3787        let content = output.replace(CURSOR_MARKER, "");
3788
3789        let content_start = content
3790            .find(EDITABLE_REGION_START_MARKER)
3791            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3792            .map(|pos| {
3793                if content.as_bytes().get(pos) == Some(&b'\n') {
3794                    pos + 1
3795                } else {
3796                    pos
3797                }
3798            })
3799            .unwrap_or(0);
3800
3801        let content_end = content
3802            .find(EDITABLE_REGION_END_MARKER)
3803            .map(|pos| {
3804                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3805                    pos - 1
3806                } else {
3807                    pos
3808                }
3809            })
3810            .unwrap_or(content.len());
3811
3812        if content_start > content_end {
3813            return Some(String::new());
3814        }
3815
3816        let extracted = &content[content_start..content_end];
3817
3818        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3819            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3820            let text_before_cursor = text_before_cursor
3821                .find(EDITABLE_REGION_START_MARKER)
3822                .map(|pos| {
3823                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3824                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3825                        after_marker + 1
3826                    } else {
3827                        after_marker
3828                    }
3829                })
3830                .unwrap_or(0);
3831            let offset_in_extracted = zeta1_cursor_pos
3832                .saturating_sub(text_before_cursor)
3833                .min(extracted.len());
3834            offset_in_extracted
3835        });
3836
3837        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3838        if let Some(offset) = cursor_offset {
3839            result.push_str(&extracted[..offset]);
3840            result.push_str(super::CURSOR_MARKER);
3841            result.push_str(&extracted[offset..]);
3842        } else {
3843            result.push_str(extracted);
3844        }
3845
3846        Some(result)
3847    }
3848}
3849
3850#[cfg(test)]
3851mod tests {
3852    use super::*;
3853    use indoc::indoc;
3854
3855    fn make_input(
3856        cursor_excerpt: &str,
3857        editable_range: Range<usize>,
3858        cursor_offset: usize,
3859        events: Vec<Event>,
3860        related_files: Vec<RelatedFile>,
3861    ) -> ZetaPromptInput {
3862        let context_range = 0..cursor_excerpt.len();
3863        ZetaPromptInput {
3864            cursor_path: Path::new("test.rs").into(),
3865            cursor_excerpt: cursor_excerpt.into(),
3866            cursor_offset_in_excerpt: cursor_offset,
3867            excerpt_start_row: None,
3868            events: events.into_iter().map(Arc::new).collect(),
3869            related_files: Some(related_files),
3870            excerpt_ranges: ExcerptRanges {
3871                editable_150: editable_range.clone(),
3872                editable_180: editable_range.clone(),
3873                editable_350: editable_range,
3874                editable_150_context_350: context_range.clone(),
3875                editable_180_context_350: context_range.clone(),
3876                editable_350_context_150: context_range,
3877                ..Default::default()
3878            },
3879            experiment: None,
3880            in_open_source_repo: false,
3881            can_collect_data: false,
3882            repo_url: None,
3883        }
3884    }
3885
3886    fn make_input_with_context_range(
3887        excerpt: &str,
3888        editable_range: Range<usize>,
3889        context_range: Range<usize>,
3890        cursor_offset: usize,
3891    ) -> ZetaPromptInput {
3892        ZetaPromptInput {
3893            cursor_path: Path::new("test.rs").into(),
3894            cursor_excerpt: excerpt.into(),
3895            cursor_offset_in_excerpt: cursor_offset,
3896            excerpt_start_row: None,
3897            events: vec![],
3898            related_files: Some(vec![]),
3899            excerpt_ranges: ExcerptRanges {
3900                editable_150: editable_range.clone(),
3901                editable_180: editable_range.clone(),
3902                editable_350: editable_range,
3903                editable_150_context_350: context_range.clone(),
3904                editable_180_context_350: context_range.clone(),
3905                editable_350_context_150: context_range,
3906                ..Default::default()
3907            },
3908            experiment: None,
3909            in_open_source_repo: false,
3910            can_collect_data: false,
3911            repo_url: None,
3912        }
3913    }
3914
3915    fn make_event(path: &str, diff: &str) -> Event {
3916        Event::BufferChange {
3917            path: Path::new(path).into(),
3918            old_path: Path::new(path).into(),
3919            diff: diff.to_string(),
3920            predicted: false,
3921            in_open_source_repo: false,
3922        }
3923    }
3924
3925    fn make_related_file(path: &str, content: &str) -> RelatedFile {
3926        RelatedFile {
3927            path: Path::new(path).into(),
3928            max_row: content.lines().count() as u32,
3929            excerpts: vec![RelatedExcerpt {
3930                row_range: 0..content.lines().count() as u32,
3931                text: content.into(),
3932                order: 0,
3933            }],
3934            in_open_source_repo: false,
3935        }
3936    }
3937
3938    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3939        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3940    }
3941
3942    #[test]
3943    fn test_no_truncation_when_within_budget() {
3944        let input = make_input(
3945            "prefix\neditable\nsuffix",
3946            7..15,
3947            10,
3948            vec![make_event("a.rs", "-old\n+new\n")],
3949            vec![make_related_file("related.rs", "fn helper() {}\n")],
3950        );
3951
3952        assert_eq!(
3953            format_with_budget(&input, 10000),
3954            indoc! {r#"
3955                <|file_sep|>related.rs
3956                fn helper() {}
3957                <|file_sep|>edit history
3958                --- a/a.rs
3959                +++ b/a.rs
3960                -old
3961                +new
3962                <|file_sep|>test.rs
3963                <|fim_prefix|>
3964                prefix
3965                <|fim_middle|>current
3966                edi<|user_cursor|>table
3967                <|fim_suffix|>
3968
3969                suffix
3970                <|fim_middle|>updated
3971            "#}
3972        );
3973    }
3974
3975    #[test]
3976    fn test_truncation_drops_edit_history_when_budget_tight() {
3977        let input = make_input(
3978            "code",
3979            0..4,
3980            2,
3981            vec![make_event("a.rs", "-x\n+y\n")],
3982            vec![
3983                make_related_file("r1.rs", "a\n"),
3984                make_related_file("r2.rs", "b\n"),
3985            ],
3986        );
3987
3988        assert_eq!(
3989            format_with_budget(&input, 10000),
3990            indoc! {r#"
3991                <|file_sep|>r1.rs
3992                a
3993                <|file_sep|>r2.rs
3994                b
3995                <|file_sep|>edit history
3996                --- a/a.rs
3997                +++ b/a.rs
3998                -x
3999                +y
4000                <|file_sep|>test.rs
4001                <|fim_prefix|>
4002                <|fim_middle|>current
4003                co<|user_cursor|>de
4004                <|fim_suffix|>
4005                <|fim_middle|>updated
4006            "#}
4007        );
4008
4009        assert_eq!(
4010            format_with_budget(&input, 50),
4011            indoc! {r#"
4012                <|file_sep|>r1.rs
4013                a
4014                <|file_sep|>r2.rs
4015                b
4016                <|file_sep|>test.rs
4017                <|fim_prefix|>
4018                <|fim_middle|>current
4019                co<|user_cursor|>de
4020                <|fim_suffix|>
4021                <|fim_middle|>updated
4022            "#}
4023        );
4024    }
4025
4026    #[test]
4027    fn test_truncation_includes_partial_excerpts() {
4028        let input = make_input(
4029            "x",
4030            0..1,
4031            0,
4032            vec![],
4033            vec![RelatedFile {
4034                path: Path::new("big.rs").into(),
4035                max_row: 30,
4036                in_open_source_repo: false,
4037                excerpts: vec![
4038                    RelatedExcerpt {
4039                        row_range: 0..10,
4040                        text: "first excerpt\n".into(),
4041                        order: 0,
4042                    },
4043                    RelatedExcerpt {
4044                        row_range: 10..20,
4045                        text: "second excerpt\n".into(),
4046                        order: 0,
4047                    },
4048                    RelatedExcerpt {
4049                        row_range: 20..30,
4050                        text: "third excerpt\n".into(),
4051                        order: 0,
4052                    },
4053                ],
4054            }],
4055        );
4056
4057        assert_eq!(
4058            format_with_budget(&input, 10000),
4059            indoc! {r#"
4060                <|file_sep|>big.rs
4061                first excerpt
4062                ...
4063                second excerpt
4064                ...
4065                third excerpt
4066                <|file_sep|>test.rs
4067                <|fim_prefix|>
4068                <|fim_middle|>current
4069                <|user_cursor|>x
4070                <|fim_suffix|>
4071                <|fim_middle|>updated
4072            "#}
4073        );
4074
4075        assert_eq!(
4076            format_with_budget(&input, 50),
4077            indoc! {r#"
4078                <|file_sep|>big.rs
4079                first excerpt
4080                ...
4081                <|file_sep|>test.rs
4082                <|fim_prefix|>
4083                <|fim_middle|>current
4084                <|user_cursor|>x
4085                <|fim_suffix|>
4086                <|fim_middle|>updated
4087            "#}
4088        );
4089    }
4090
4091    #[test]
4092    fn test_truncation_prioritizes_lower_order_excerpts() {
4093        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4094        // With tight budget, only the lower-order excerpt from file_b should be included.
4095        let input = make_input(
4096            "x",
4097            0..1,
4098            0,
4099            vec![],
4100            vec![
4101                RelatedFile {
4102                    path: Path::new("file_a.rs").into(),
4103                    max_row: 10,
4104                    in_open_source_repo: false,
4105                    excerpts: vec![RelatedExcerpt {
4106                        row_range: 0..10,
4107                        text: "low priority content\n".into(),
4108                        order: 5,
4109                    }],
4110                },
4111                RelatedFile {
4112                    path: Path::new("file_b.rs").into(),
4113                    max_row: 10,
4114                    in_open_source_repo: false,
4115                    excerpts: vec![RelatedExcerpt {
4116                        row_range: 0..10,
4117                        text: "high priority content\n".into(),
4118                        order: 1,
4119                    }],
4120                },
4121            ],
4122        );
4123
4124        // With large budget, both files included; rendered in stable lexicographic order.
4125        assert_eq!(
4126            format_with_budget(&input, 10000),
4127            indoc! {r#"
4128                <|file_sep|>file_a.rs
4129                low priority content
4130                <|file_sep|>file_b.rs
4131                high priority content
4132                <|file_sep|>test.rs
4133                <|fim_prefix|>
4134                <|fim_middle|>current
4135                <|user_cursor|>x
4136                <|fim_suffix|>
4137                <|fim_middle|>updated
4138            "#}
4139        );
4140
4141        // With tight budget, only file_b (lower order) fits.
4142        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4143        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4144        // file_a would need another 14 tokens, which doesn't fit.
4145        assert_eq!(
4146            format_with_budget(&input, 52),
4147            indoc! {r#"
4148                <|file_sep|>file_b.rs
4149                high priority content
4150                <|file_sep|>test.rs
4151                <|fim_prefix|>
4152                <|fim_middle|>current
4153                <|user_cursor|>x
4154                <|fim_suffix|>
4155                <|fim_middle|>updated
4156            "#}
4157        );
4158    }
4159
4160    #[test]
4161    fn test_truncation_drops_high_order_excerpts_within_file() {
4162        // A single file has excerpts at order 1 and order 3. With a tight budget,
4163        // only the order-1 excerpts are included while the order-3 excerpt is
4164        // dropped — even though they belong to the same file. This also preserves
4165        // the parent invariant: parent outline items have order ≤ their best
4166        // child, so they're always included when any child is.
4167        let input = make_input(
4168            "x",
4169            0..1,
4170            0,
4171            vec![],
4172            vec![RelatedFile {
4173                path: Path::new("mod.rs").into(),
4174                max_row: 30,
4175                in_open_source_repo: false,
4176                excerpts: vec![
4177                    RelatedExcerpt {
4178                        row_range: 0..5,
4179                        text: "mod header\n".into(),
4180                        order: 1,
4181                    },
4182                    RelatedExcerpt {
4183                        row_range: 5..15,
4184                        text: "important fn\n".into(),
4185                        order: 1,
4186                    },
4187                    RelatedExcerpt {
4188                        row_range: 15..30,
4189                        text: "less important fn\n".into(),
4190                        order: 3,
4191                    },
4192                ],
4193            }],
4194        );
4195
4196        // With large budget, all three excerpts included.
4197        assert_eq!(
4198            format_with_budget(&input, 10000),
4199            indoc! {r#"
4200                <|file_sep|>mod.rs
4201                mod header
4202                ...
4203                important fn
4204                ...
4205                less important fn
4206                <|file_sep|>test.rs
4207                <|fim_prefix|>
4208                <|fim_middle|>current
4209                <|user_cursor|>x
4210                <|fim_suffix|>
4211                <|fim_middle|>updated
4212            "#}
4213        );
4214
4215        // With tight budget, only order<=1 excerpts included (header + important fn).
4216        assert_eq!(
4217            format_with_budget(&input, 55),
4218            indoc! {r#"
4219                <|file_sep|>mod.rs
4220                mod header
4221                ...
4222                important fn
4223                ...
4224                <|file_sep|>test.rs
4225                <|fim_prefix|>
4226                <|fim_middle|>current
4227                <|user_cursor|>x
4228                <|fim_suffix|>
4229                <|fim_middle|>updated
4230            "#}
4231        );
4232    }
4233
4234    #[test]
4235    fn test_truncation_drops_older_events_first() {
4236        let input = make_input(
4237            "x",
4238            0..1,
4239            0,
4240            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4241            vec![],
4242        );
4243
4244        assert_eq!(
4245            format_with_budget(&input, 10000),
4246            indoc! {r#"
4247                <|file_sep|>edit history
4248                --- a/old.rs
4249                +++ b/old.rs
4250                -1
4251                --- a/new.rs
4252                +++ b/new.rs
4253                -2
4254                <|file_sep|>test.rs
4255                <|fim_prefix|>
4256                <|fim_middle|>current
4257                <|user_cursor|>x
4258                <|fim_suffix|>
4259                <|fim_middle|>updated
4260            "#}
4261        );
4262
4263        assert_eq!(
4264            format_with_budget(&input, 55),
4265            indoc! {r#"
4266                <|file_sep|>edit history
4267                --- a/new.rs
4268                +++ b/new.rs
4269                -2
4270                <|file_sep|>test.rs
4271                <|fim_prefix|>
4272                <|fim_middle|>current
4273                <|user_cursor|>x
4274                <|fim_suffix|>
4275                <|fim_middle|>updated
4276            "#}
4277        );
4278    }
4279
4280    #[test]
4281    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4282        let input = make_input(
4283            "fn main() {}",
4284            0..12,
4285            3,
4286            vec![make_event("a.rs", "-old\n+new\n")],
4287            vec![make_related_file("related.rs", "helper\n")],
4288        );
4289
4290        assert_eq!(
4291            format_with_budget(&input, 30),
4292            indoc! {r#"
4293                <|file_sep|>test.rs
4294                <|fim_prefix|>
4295                <|fim_middle|>current
4296                fn <|user_cursor|>main() {}
4297                <|fim_suffix|>
4298                <|fim_middle|>updated
4299            "#}
4300        );
4301    }
4302
4303    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4304        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4305    }
4306
4307    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4308        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4309    }
4310
4311    #[test]
4312    fn test_seed_coder_basic_format() {
4313        let input = make_input(
4314            "prefix\neditable\nsuffix",
4315            7..15,
4316            10,
4317            vec![make_event("a.rs", "-old\n+new\n")],
4318            vec![make_related_file("related.rs", "fn helper() {}\n")],
4319        );
4320
4321        assert_eq!(
4322            format_seed_coder(&input),
4323            indoc! {r#"
4324                <[fim-suffix]>
4325                suffix
4326                <[fim-prefix]><filename>related.rs
4327                fn helper() {}
4328
4329                <filename>edit_history
4330                --- a/a.rs
4331                +++ b/a.rs
4332                -old
4333                +new
4334
4335                <filename>test.rs
4336                prefix
4337                <<<<<<< CURRENT
4338                edi<|user_cursor|>table
4339                =======
4340                <[fim-middle]>"#}
4341        );
4342    }
4343
4344    #[test]
4345    fn test_seed_coder_no_context() {
4346        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4347
4348        assert_eq!(
4349            format_seed_coder(&input),
4350            indoc! {r#"
4351                <[fim-suffix]>
4352                after
4353                <[fim-prefix]><filename>test.rs
4354                before
4355                <<<<<<< CURRENT
4356                mid<|user_cursor|>dle
4357                =======
4358                <[fim-middle]>"#}
4359        );
4360    }
4361
4362    #[test]
4363    fn test_seed_coder_truncation_drops_context() {
4364        let input = make_input(
4365            "code",
4366            0..4,
4367            2,
4368            vec![make_event("a.rs", "-x\n+y\n")],
4369            vec![make_related_file("r1.rs", "content\n")],
4370        );
4371
4372        // With large budget, everything is included
4373        assert_eq!(
4374            format_seed_coder(&input),
4375            indoc! {r#"
4376                <[fim-suffix]>
4377                <[fim-prefix]><filename>r1.rs
4378                content
4379
4380                <filename>edit_history
4381                --- a/a.rs
4382                +++ b/a.rs
4383                -x
4384                +y
4385
4386                <filename>test.rs
4387                <<<<<<< CURRENT
4388                co<|user_cursor|>de
4389                =======
4390                <[fim-middle]>"#}
4391        );
4392
4393        // With tight budget, context is dropped but cursor section remains
4394        assert_eq!(
4395            format_seed_coder_with_budget(&input, 30),
4396            indoc! {r#"
4397                <[fim-suffix]>
4398                <[fim-prefix]><filename>test.rs
4399                <<<<<<< CURRENT
4400                co<|user_cursor|>de
4401                =======
4402                <[fim-middle]>"#}
4403        );
4404    }
4405
4406    #[test]
4407    fn test_seed_coder_truncation_prioritizes_lower_order() {
4408        let input = make_input(
4409            "code",
4410            0..4,
4411            2,
4412            vec![],
4413            vec![
4414                RelatedFile {
4415                    path: Path::new("low_prio.rs").into(),
4416                    max_row: 5,
4417                    in_open_source_repo: false,
4418                    excerpts: vec![RelatedExcerpt {
4419                        row_range: 0..5,
4420                        text: "low prio\n".into(),
4421                        order: 10,
4422                    }],
4423                },
4424                RelatedFile {
4425                    path: Path::new("high_prio.rs").into(),
4426                    max_row: 5,
4427                    in_open_source_repo: false,
4428                    excerpts: vec![RelatedExcerpt {
4429                        row_range: 0..5,
4430                        text: "high prio\n".into(),
4431                        order: 1,
4432                    }],
4433                },
4434            ],
4435        );
4436
4437        // With large budget, both included; rendered in stable lexicographic order.
4438        assert_eq!(
4439            format_seed_coder(&input),
4440            indoc! {r#"
4441                <[fim-suffix]>
4442                <[fim-prefix]><filename>low_prio.rs
4443                low prio
4444                <filename>high_prio.rs
4445                high prio
4446
4447                <filename>test.rs
4448                <<<<<<< CURRENT
4449                co<|user_cursor|>de
4450                =======
4451                <[fim-middle]>"#}
4452        );
4453
4454        // With tight budget, only high_prio included.
4455        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4456        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4457        assert_eq!(
4458            format_seed_coder_with_budget(&input, 44),
4459            indoc! {r#"
4460                <[fim-suffix]>
4461                <[fim-prefix]><filename>high_prio.rs
4462                high prio
4463
4464                <filename>test.rs
4465                <<<<<<< CURRENT
4466                co<|user_cursor|>de
4467                =======
4468                <[fim-middle]>"#}
4469        );
4470    }
4471
4472    #[test]
4473    fn test_format_zeta1_from_input_basic() {
4474        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4475        let input = ZetaPromptInput {
4476            cursor_path: Path::new("src/main.rs").into(),
4477            cursor_excerpt: excerpt.into(),
4478            cursor_offset_in_excerpt: 30,
4479            excerpt_start_row: Some(0),
4480            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4481            related_files: Some(vec![]),
4482            excerpt_ranges: ExcerptRanges {
4483                editable_150: 15..41,
4484                editable_180: 15..41,
4485                editable_350: 15..41,
4486                editable_150_context_350: 0..excerpt.len(),
4487                editable_180_context_350: 0..excerpt.len(),
4488                editable_350_context_150: 0..excerpt.len(),
4489                ..Default::default()
4490            },
4491            experiment: None,
4492            in_open_source_repo: false,
4493            can_collect_data: false,
4494            repo_url: None,
4495        };
4496
4497        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4498
4499        assert_eq!(
4500            prompt,
4501            concat!(
4502                "### Instruction:\n",
4503                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4504                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4505                "into account the cursor location.\n",
4506                "\n",
4507                "### User Edits:\n",
4508                "\n",
4509                "User edited other.rs:\n",
4510                "```diff\n",
4511                "-old\n",
4512                "+new\n",
4513                "\n",
4514                "```\n",
4515                "\n",
4516                "### User Excerpt:\n",
4517                "\n",
4518                "```src/main.rs\n",
4519                "<|start_of_file|>\n",
4520                "fn before() {}\n",
4521                "<|editable_region_start|>\n",
4522                "fn foo() {\n",
4523                "    <|user_cursor_is_here|>let x = 1;\n",
4524                "\n",
4525                "<|editable_region_end|>}\n",
4526                "fn after() {}\n",
4527                "\n",
4528                "```\n",
4529                "\n",
4530                "### Response:\n",
4531            ),
4532        );
4533    }
4534
4535    #[test]
4536    fn test_format_zeta1_from_input_no_start_of_file() {
4537        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4538        let input = ZetaPromptInput {
4539            cursor_path: Path::new("src/main.rs").into(),
4540            cursor_excerpt: excerpt.into(),
4541            cursor_offset_in_excerpt: 15,
4542            excerpt_start_row: Some(10),
4543            events: vec![],
4544            related_files: Some(vec![]),
4545            excerpt_ranges: ExcerptRanges {
4546                editable_150: 0..28,
4547                editable_180: 0..28,
4548                editable_350: 0..28,
4549                editable_150_context_350: 0..28,
4550                editable_180_context_350: 0..28,
4551                editable_350_context_150: 0..28,
4552                ..Default::default()
4553            },
4554            experiment: None,
4555            in_open_source_repo: false,
4556            can_collect_data: false,
4557            repo_url: None,
4558        };
4559
4560        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4561
4562        assert_eq!(
4563            prompt,
4564            concat!(
4565                "### Instruction:\n",
4566                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4567                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4568                "into account the cursor location.\n",
4569                "\n",
4570                "### User Edits:\n",
4571                "\n",
4572                "\n",
4573                "\n",
4574                "### User Excerpt:\n",
4575                "\n",
4576                "```src/main.rs\n",
4577                "<|editable_region_start|>\n",
4578                "fn foo() {\n",
4579                "    <|user_cursor_is_here|>let x = 1;\n",
4580                "}\n",
4581                "\n",
4582                "<|editable_region_end|>\n",
4583                "```\n",
4584                "\n",
4585                "### Response:\n",
4586            ),
4587        );
4588    }
4589
4590    #[test]
4591    fn test_format_zeta1_from_input_with_sub_ranges() {
4592        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4593        let editable_range = 10..37;
4594        let context_range = 0..excerpt.len();
4595
4596        let input = ZetaPromptInput {
4597            cursor_path: Path::new("test.rs").into(),
4598            cursor_excerpt: excerpt.into(),
4599            cursor_offset_in_excerpt: 25,
4600            excerpt_start_row: Some(0),
4601            events: vec![],
4602            related_files: Some(vec![]),
4603            excerpt_ranges: ExcerptRanges {
4604                editable_150: editable_range.clone(),
4605                editable_180: editable_range.clone(),
4606                editable_350: editable_range.clone(),
4607                editable_150_context_350: context_range.clone(),
4608                editable_180_context_350: context_range.clone(),
4609                editable_350_context_150: context_range.clone(),
4610                ..Default::default()
4611            },
4612            experiment: None,
4613            in_open_source_repo: false,
4614            can_collect_data: false,
4615            repo_url: None,
4616        };
4617
4618        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4619
4620        assert_eq!(
4621            prompt,
4622            concat!(
4623                "### Instruction:\n",
4624                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4625                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4626                "into account the cursor location.\n",
4627                "\n",
4628                "### User Edits:\n",
4629                "\n",
4630                "\n",
4631                "\n",
4632                "### User Excerpt:\n",
4633                "\n",
4634                "```test.rs\n",
4635                "<|start_of_file|>\n",
4636                "// prefix\n",
4637                "<|editable_region_start|>\n",
4638                "fn foo() {\n",
4639                "    <|user_cursor_is_here|>let x = 1;\n",
4640                "}\n",
4641                "<|editable_region_end|>\n",
4642                "// suffix\n",
4643                "\n",
4644                "```\n",
4645                "\n",
4646                "### Response:\n",
4647            ),
4648        );
4649    }
4650
4651    #[test]
4652    fn test_clean_zeta1_model_output_basic() {
4653        let output = indoc! {"
4654            <|editable_region_start|>
4655            fn main() {
4656                println!(\"hello\");
4657            }
4658            <|editable_region_end|>
4659        "};
4660
4661        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4662        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4663    }
4664
4665    #[test]
4666    fn test_clean_zeta1_model_output_with_cursor() {
4667        let output = indoc! {"
4668            <|editable_region_start|>
4669            fn main() {
4670                <|user_cursor_is_here|>println!(\"hello\");
4671            }
4672            <|editable_region_end|>
4673        "};
4674
4675        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4676        assert_eq!(
4677            cleaned,
4678            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4679        );
4680    }
4681
4682    #[test]
4683    fn test_clean_zeta1_model_output_no_markers() {
4684        let output = "fn main() {}\n";
4685        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4686        assert_eq!(cleaned, "fn main() {}\n");
4687    }
4688
4689    #[test]
4690    fn test_clean_zeta1_model_output_empty_region() {
4691        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4692        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4693        assert_eq!(cleaned, "");
4694    }
4695
4696    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4697        let mut result = excerpt.to_string();
4698        result.replace_range(
4699            parsed_output.range_in_excerpt.clone(),
4700            &parsed_output.new_editable_region,
4701        );
4702        result
4703    }
4704
4705    #[test]
4706    fn test_parse_zeta2_model_output() {
4707        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4708        let context_start = excerpt.find("ctx start").unwrap();
4709        let context_end = excerpt.find("after ctx").unwrap();
4710        let editable_start = excerpt.find("editable old").unwrap();
4711        let editable_end = editable_start + "editable old\n".len();
4712        let input = make_input_with_context_range(
4713            excerpt,
4714            editable_start..editable_end,
4715            context_start..context_end,
4716            editable_start,
4717        );
4718
4719        let output = parse_zeta2_model_output(
4720            "editable new\n>>>>>>> UPDATED\n",
4721            ZetaFormat::V0131GitMergeMarkersPrefix,
4722            &input,
4723        )
4724        .unwrap();
4725
4726        assert_eq!(
4727            apply_edit(excerpt, &output),
4728            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4729        );
4730    }
4731
4732    #[test]
4733    fn test_parse_zeta2_model_output_identity() {
4734        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4735        let editable_start = excerpt.find("bbb").unwrap();
4736        let editable_end = excerpt.find("ddd").unwrap();
4737        let input = make_input_with_context_range(
4738            excerpt,
4739            editable_start..editable_end,
4740            0..excerpt.len(),
4741            editable_start,
4742        );
4743
4744        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4745        let output =
4746            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4747
4748        assert_eq!(apply_edit(excerpt, &output), excerpt);
4749    }
4750
4751    #[test]
4752    fn test_parse_zeta2_model_output_strips_end_marker() {
4753        let excerpt = "hello\nworld\n";
4754        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4755
4756        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4757        let output1 =
4758            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4759        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4760
4761        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4762        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4763    }
4764}