zeta_prompt.rs

   1pub mod excerpt_ranges;
   2
   3use anyhow::{Result, anyhow};
   4use serde::{Deserialize, Serialize};
   5use std::fmt::Write;
   6use std::ops::Range;
   7use std::path::Path;
   8use std::sync::Arc;
   9use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  10
  11pub use crate::excerpt_ranges::{
  12    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  13};
  14
  15pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  16pub const MAX_PROMPT_TOKENS: usize = 4096;
  17
  18/// Use up to this amount of the editable region for prefill.
  19/// Larger values may result in more robust generation, but
  20/// this region becomes non-editable.
  21pub const PREFILL_RATIO: f64 = 0.1; // 10%
  22
  23fn estimate_tokens(bytes: usize) -> usize {
  24    bytes / 3
  25}
  26
  27#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  28pub struct ZetaPromptInput {
  29    pub cursor_path: Arc<Path>,
  30    pub cursor_excerpt: Arc<str>,
  31    pub cursor_offset_in_excerpt: usize,
  32    #[serde(default, skip_serializing_if = "Option::is_none")]
  33    pub excerpt_start_row: Option<u32>,
  34    pub events: Vec<Arc<Event>>,
  35    #[serde(default)]
  36    pub related_files: Option<Vec<RelatedFile>>,
  37    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  38    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  39    /// These ranges let the server select model-appropriate subsets.
  40    pub excerpt_ranges: ExcerptRanges,
  41    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  42    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  43    /// When present, the server uses these to compute editable/context ranges
  44    /// instead of `excerpt_ranges`.
  45    #[serde(default, skip_serializing_if = "Option::is_none")]
  46    pub syntax_ranges: Option<Vec<Range<usize>>>,
  47    /// The name of the edit prediction model experiment to use.
  48    #[serde(default, skip_serializing_if = "Option::is_none")]
  49    pub experiment: Option<String>,
  50    #[serde(default)]
  51    pub in_open_source_repo: bool,
  52    #[serde(default)]
  53    pub can_collect_data: bool,
  54    #[serde(default, skip_serializing_if = "Option::is_none")]
  55    pub repo_url: Option<String>,
  56}
  57
  58#[derive(
  59    Default,
  60    Clone,
  61    Copy,
  62    Debug,
  63    PartialEq,
  64    Eq,
  65    Hash,
  66    EnumIter,
  67    IntoStaticStr,
  68    Serialize,
  69    Deserialize,
  70)]
  71#[allow(non_camel_case_types)]
  72pub enum ZetaFormat {
  73    V0112MiddleAtEnd,
  74    V0113Ordered,
  75    V0114180EditableRegion,
  76    V0120GitMergeMarkers,
  77    #[default]
  78    V0131GitMergeMarkersPrefix,
  79    V0211Prefill,
  80    V0211SeedCoder,
  81    v0226Hashline,
  82    V0304VariableEdit,
  83    V0304SeedNoEdits,
  84}
  85
  86impl std::fmt::Display for ZetaFormat {
  87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  88        write!(f, "{}", <&'static str>::from(self))
  89    }
  90}
  91
  92impl ZetaFormat {
  93    pub fn parse(format_name: &str) -> Result<Self> {
  94        let mut results = ZetaFormat::iter().filter(|version| {
  95            <&'static str>::from(version)
  96                .to_lowercase()
  97                .contains(&format_name.to_lowercase())
  98        });
  99        let Some(result) = results.next() else {
 100            anyhow::bail!(
 101                "`{format_name}` did not match any of:\n{}",
 102                Self::options_as_string()
 103            );
 104        };
 105        if results.next().is_some() {
 106            anyhow::bail!(
 107                "`{format_name}` matched more than one of:\n{}",
 108                Self::options_as_string()
 109            );
 110        }
 111        Ok(result)
 112    }
 113
 114    pub fn options_as_string() -> String {
 115        ZetaFormat::iter()
 116            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 117            .collect::<Vec<_>>()
 118            .concat()
 119    }
 120}
 121
 122#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 123#[serde(tag = "event")]
 124pub enum Event {
 125    BufferChange {
 126        path: Arc<Path>,
 127        old_path: Arc<Path>,
 128        diff: String,
 129        predicted: bool,
 130        in_open_source_repo: bool,
 131    },
 132}
 133
 134impl Event {
 135    pub fn in_open_source_repo(&self) -> bool {
 136        match self {
 137            Event::BufferChange {
 138                in_open_source_repo,
 139                ..
 140            } => *in_open_source_repo,
 141        }
 142    }
 143}
 144
 145pub fn write_event(prompt: &mut String, event: &Event) {
 146    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 147        for component in path.components() {
 148            prompt.push('/');
 149            write!(prompt, "{}", component.as_os_str().display()).ok();
 150        }
 151    }
 152    match event {
 153        Event::BufferChange {
 154            path,
 155            old_path,
 156            diff,
 157            predicted,
 158            in_open_source_repo: _,
 159        } => {
 160            if *predicted {
 161                prompt.push_str("// User accepted prediction:\n");
 162            }
 163            prompt.push_str("--- a");
 164            write_path_as_unix_str(prompt, old_path.as_ref());
 165            prompt.push_str("\n+++ b");
 166            write_path_as_unix_str(prompt, path.as_ref());
 167            prompt.push('\n');
 168            prompt.push_str(diff);
 169        }
 170    }
 171}
 172
 173#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 174pub struct ActiveBufferDiagnostic {
 175    pub severity: Option<i32>,
 176    pub message: String,
 177    pub snippet: String,
 178    pub snippet_buffer_row_range: Range<u32>,
 179    pub diagnostic_range_in_snippet: Range<usize>,
 180}
 181
 182#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 183pub struct RelatedFile {
 184    pub path: Arc<Path>,
 185    pub max_row: u32,
 186    pub excerpts: Vec<RelatedExcerpt>,
 187    #[serde(default)]
 188    pub in_open_source_repo: bool,
 189}
 190
 191#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 192pub struct RelatedExcerpt {
 193    pub row_range: Range<u32>,
 194    pub text: Arc<str>,
 195    #[serde(default)]
 196    pub order: usize,
 197}
 198
 199pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 200    special_tokens_for_format(format)
 201        .iter()
 202        .any(|token| input.cursor_excerpt.contains(token))
 203}
 204
 205pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 206    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 207}
 208
 209pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 210    match format {
 211        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 212        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 213        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 214        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 215        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 216        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 217        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 218        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 219        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 220        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 221    }
 222}
 223
 224/// Returns the (editable_token_limit, context_token_limit) for a given format.
 225pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 226    match format {
 227        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 228        ZetaFormat::V0114180EditableRegion => (180, 350),
 229        ZetaFormat::V0120GitMergeMarkers
 230        | ZetaFormat::V0131GitMergeMarkersPrefix
 231        | ZetaFormat::V0211Prefill
 232        | ZetaFormat::V0211SeedCoder
 233        | ZetaFormat::v0226Hashline
 234        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 235        ZetaFormat::V0304VariableEdit => (1024, 0),
 236    }
 237}
 238
 239pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 240    match format {
 241        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 242        ZetaFormat::V0112MiddleAtEnd
 243        | ZetaFormat::V0113Ordered
 244        | ZetaFormat::V0114180EditableRegion
 245        | ZetaFormat::V0120GitMergeMarkers
 246        | ZetaFormat::V0131GitMergeMarkersPrefix
 247        | ZetaFormat::V0211Prefill
 248        | ZetaFormat::V0211SeedCoder
 249        | ZetaFormat::V0304VariableEdit
 250        | ZetaFormat::V0304SeedNoEdits => &[],
 251    }
 252}
 253
 254pub fn excerpt_ranges_for_format(
 255    format: ZetaFormat,
 256    ranges: &ExcerptRanges,
 257) -> (Range<usize>, Range<usize>) {
 258    match format {
 259        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 260            ranges.editable_150.clone(),
 261            ranges.editable_150_context_350.clone(),
 262        ),
 263        ZetaFormat::V0114180EditableRegion => (
 264            ranges.editable_180.clone(),
 265            ranges.editable_180_context_350.clone(),
 266        ),
 267        ZetaFormat::V0120GitMergeMarkers
 268        | ZetaFormat::V0131GitMergeMarkersPrefix
 269        | ZetaFormat::V0211Prefill
 270        | ZetaFormat::V0211SeedCoder
 271        | ZetaFormat::v0226Hashline
 272        | ZetaFormat::V0304SeedNoEdits => (
 273            ranges.editable_350.clone(),
 274            ranges.editable_350_context_150.clone(),
 275        ),
 276        ZetaFormat::V0304VariableEdit => {
 277            let context = ranges
 278                .editable_350_context_1024
 279                .clone()
 280                .or(ranges.editable_350_context_512.clone())
 281                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 282            (context.clone(), context)
 283        }
 284    }
 285}
 286
 287pub fn write_cursor_excerpt_section_for_format(
 288    format: ZetaFormat,
 289    prompt: &mut String,
 290    path: &Path,
 291    context: &str,
 292    editable_range: &Range<usize>,
 293    cursor_offset: usize,
 294) {
 295    match format {
 296        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 297            prompt,
 298            path,
 299            context,
 300            editable_range,
 301            cursor_offset,
 302        ),
 303        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 304            v0113_ordered::write_cursor_excerpt_section(
 305                prompt,
 306                path,
 307                context,
 308                editable_range,
 309                cursor_offset,
 310            )
 311        }
 312        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 313            prompt,
 314            path,
 315            context,
 316            editable_range,
 317            cursor_offset,
 318        ),
 319        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 320            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 321                prompt,
 322                path,
 323                context,
 324                editable_range,
 325                cursor_offset,
 326            )
 327        }
 328        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 329            seed_coder::write_cursor_excerpt_section(
 330                prompt,
 331                path,
 332                context,
 333                editable_range,
 334                cursor_offset,
 335            )
 336        }
 337        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 338            prompt,
 339            path,
 340            context,
 341            editable_range,
 342            cursor_offset,
 343        ),
 344        ZetaFormat::V0304VariableEdit => {
 345            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 346        }
 347    }
 348}
 349
 350fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 351    let start_row = text[0..range.start].matches('\n').count() as u32;
 352    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 353    if !text[..range.end].ends_with('\n') {
 354        end_row += 1;
 355    }
 356    return start_row..end_row;
 357}
 358
 359pub fn format_prompt_with_budget_for_format(
 360    input: &ZetaPromptInput,
 361    format: ZetaFormat,
 362    max_tokens: usize,
 363) -> String {
 364    let (context, editable_range, context_range, cursor_offset) =
 365        resolve_cursor_region(input, format);
 366    let path = &*input.cursor_path;
 367
 368    let empty_files = Vec::new();
 369    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 370    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 371        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 372        let row_range = relative_row_range.start + cursor_excerpt_start_row
 373            ..relative_row_range.end + cursor_excerpt_start_row;
 374        &filter_redundant_excerpts(
 375            input_related_files.to_vec(),
 376            input.cursor_path.as_ref(),
 377            row_range,
 378        )
 379    } else {
 380        input_related_files
 381    };
 382
 383    match format {
 384        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 385            seed_coder::format_prompt_with_budget(
 386                path,
 387                context,
 388                &editable_range,
 389                cursor_offset,
 390                &input.events,
 391                related_files,
 392                max_tokens,
 393            )
 394        }
 395        _ => {
 396            let mut cursor_section = String::new();
 397            write_cursor_excerpt_section_for_format(
 398                format,
 399                &mut cursor_section,
 400                path,
 401                context,
 402                &editable_range,
 403                cursor_offset,
 404            );
 405
 406            let cursor_tokens = estimate_tokens(cursor_section.len());
 407            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 408
 409            let edit_history_section = format_edit_history_within_budget(
 410                &input.events,
 411                "<|file_sep|>",
 412                "edit history",
 413                budget_after_cursor,
 414            );
 415            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 416            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 417
 418            let related_files_section = format_related_files_within_budget(
 419                &related_files,
 420                "<|file_sep|>",
 421                "",
 422                budget_after_edit_history,
 423            );
 424
 425            let mut prompt = String::new();
 426            prompt.push_str(&related_files_section);
 427            prompt.push_str(&edit_history_section);
 428            prompt.push_str(&cursor_section);
 429            prompt
 430        }
 431    }
 432}
 433
 434pub fn filter_redundant_excerpts(
 435    mut related_files: Vec<RelatedFile>,
 436    cursor_path: &Path,
 437    cursor_row_range: Range<u32>,
 438) -> Vec<RelatedFile> {
 439    for file in &mut related_files {
 440        if file.path.as_ref() == cursor_path {
 441            file.excerpts.retain(|excerpt| {
 442                excerpt.row_range.start < cursor_row_range.start
 443                    || excerpt.row_range.end > cursor_row_range.end
 444            });
 445        }
 446    }
 447    related_files.retain(|file| !file.excerpts.is_empty());
 448    related_files
 449}
 450
 451pub fn get_prefill_for_format(
 452    format: ZetaFormat,
 453    context: &str,
 454    editable_range: &Range<usize>,
 455) -> String {
 456    match format {
 457        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 458        ZetaFormat::V0112MiddleAtEnd
 459        | ZetaFormat::V0113Ordered
 460        | ZetaFormat::V0114180EditableRegion
 461        | ZetaFormat::V0120GitMergeMarkers
 462        | ZetaFormat::V0131GitMergeMarkersPrefix
 463        | ZetaFormat::V0211SeedCoder
 464        | ZetaFormat::v0226Hashline
 465        | ZetaFormat::V0304VariableEdit => String::new(),
 466        ZetaFormat::V0304SeedNoEdits => String::new(),
 467    }
 468}
 469
 470pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 471    match format {
 472        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 473        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 474        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 475        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
 476        ZetaFormat::V0112MiddleAtEnd
 477        | ZetaFormat::V0113Ordered
 478        | ZetaFormat::V0114180EditableRegion
 479        | ZetaFormat::v0226Hashline
 480        | ZetaFormat::V0304VariableEdit => None,
 481    }
 482}
 483
 484pub fn encode_patch_as_output_for_format(
 485    format: ZetaFormat,
 486    old_editable_region: &str,
 487    patch: &str,
 488    cursor_offset: Option<usize>,
 489) -> Result<Option<String>> {
 490    match format {
 491        ZetaFormat::v0226Hashline => {
 492            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 493        }
 494        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 495            old_editable_region,
 496            patch,
 497            cursor_offset,
 498        )
 499        .map(Some),
 500        ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
 501        _ => Ok(None),
 502    }
 503}
 504
 505pub struct ParsedOutput {
 506    /// Text that should replace the editable region
 507    pub new_editable_region: String,
 508    /// The byte range within `cursor_excerpt` that this replacement applies to
 509    pub range_in_excerpt: Range<usize>,
 510}
 511
 512/// Parse model output for the given zeta format
 513pub fn parse_zeta2_model_output(
 514    output: &str,
 515    format: ZetaFormat,
 516    prompt_inputs: &ZetaPromptInput,
 517) -> Result<ParsedOutput> {
 518    let output = match output_end_marker_for_format(format) {
 519        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 520        None => output,
 521    };
 522
 523    let (context, editable_range_in_context, context_range, _) =
 524        resolve_cursor_region(prompt_inputs, format);
 525    let context_start = context_range.start;
 526    let old_editable_region = &context[editable_range_in_context.clone()];
 527
 528    let (range_in_context, output) = match format {
 529        ZetaFormat::v0226Hashline => (
 530            editable_range_in_context,
 531            if hashline::output_has_edit_commands(output) {
 532                hashline::apply_edit_commands(old_editable_region, output)
 533            } else {
 534                output.to_string()
 535            },
 536        ),
 537        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 538        ZetaFormat::V0304SeedNoEdits => (
 539            editable_range_in_context,
 540            if output.starts_with(seed_coder::NO_EDITS) {
 541                old_editable_region.to_string()
 542            } else {
 543                output.to_string()
 544            },
 545        ),
 546        _ => (editable_range_in_context, output.to_string()),
 547    };
 548
 549    let range_in_excerpt =
 550        range_in_context.start + context_start..range_in_context.end + context_start;
 551
 552    Ok(ParsedOutput {
 553        new_editable_region: output,
 554        range_in_excerpt,
 555    })
 556}
 557
 558pub fn excerpt_range_for_format(
 559    format: ZetaFormat,
 560    ranges: &ExcerptRanges,
 561) -> (Range<usize>, Range<usize>) {
 562    excerpt_ranges_for_format(format, ranges)
 563}
 564
 565pub fn resolve_cursor_region(
 566    input: &ZetaPromptInput,
 567    format: ZetaFormat,
 568) -> (&str, Range<usize>, Range<usize>, usize) {
 569    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 570        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 571        compute_editable_and_context_ranges(
 572            &input.cursor_excerpt,
 573            input.cursor_offset_in_excerpt,
 574            syntax_ranges,
 575            editable_tokens,
 576            context_tokens,
 577        )
 578    } else {
 579        excerpt_range_for_format(format, &input.excerpt_ranges)
 580    };
 581    let context_start = context_range.start;
 582    let context_text = &input.cursor_excerpt[context_range.clone()];
 583    let adjusted_editable =
 584        (editable_range.start - context_start)..(editable_range.end - context_start);
 585    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 586
 587    (
 588        context_text,
 589        adjusted_editable,
 590        context_range,
 591        adjusted_cursor,
 592    )
 593}
 594
 595pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 596    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 597    get_prefill_for_format(format, context, &editable_range)
 598}
 599
 600fn format_edit_history_within_budget(
 601    events: &[Arc<Event>],
 602    file_marker: &str,
 603    edit_history_name: &str,
 604    max_tokens: usize,
 605) -> String {
 606    let header = format!("{}{}\n", file_marker, edit_history_name);
 607    let header_tokens = estimate_tokens(header.len());
 608    if header_tokens >= max_tokens {
 609        return String::new();
 610    }
 611
 612    let mut event_strings: Vec<String> = Vec::new();
 613    let mut total_tokens = header_tokens;
 614
 615    for event in events.iter().rev() {
 616        let mut event_str = String::new();
 617        write_event(&mut event_str, event);
 618        let event_tokens = estimate_tokens(event_str.len());
 619
 620        if total_tokens + event_tokens > max_tokens {
 621            break;
 622        }
 623        total_tokens += event_tokens;
 624        event_strings.push(event_str);
 625    }
 626
 627    if event_strings.is_empty() {
 628        return String::new();
 629    }
 630
 631    let mut result = header;
 632    for event_str in event_strings.iter().rev() {
 633        result.push_str(event_str);
 634    }
 635    result
 636}
 637
 638fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 639    let needs_newline = !excerpt.text.ends_with('\n');
 640    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 641    let len = excerpt.text.len()
 642        + if needs_newline { "\n".len() } else { 0 }
 643        + if needs_ellipsis { "...\n".len() } else { 0 };
 644    estimate_tokens(len)
 645}
 646
 647pub fn format_related_files_within_budget(
 648    related_files: &[RelatedFile],
 649    file_prefix: &str,
 650    file_suffix: &str,
 651    max_tokens: usize,
 652) -> String {
 653    struct ExcerptCandidate {
 654        file_ix: usize,
 655        excerpt_ix: usize,
 656        order: usize,
 657    }
 658
 659    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 660        .iter()
 661        .enumerate()
 662        .flat_map(|(file_ix, file)| {
 663            file.excerpts
 664                .iter()
 665                .enumerate()
 666                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 667                    file_ix,
 668                    excerpt_ix,
 669                    order: e.order,
 670                })
 671        })
 672        .collect();
 673
 674    // Pre-compute file header strings and their token costs.
 675    let file_headers: Vec<String> = related_files
 676        .iter()
 677        .map(|file| {
 678            let path_str = file.path.to_string_lossy();
 679            format!("{}{}\n", file_prefix, path_str)
 680        })
 681        .collect();
 682
 683    // Sort the excerpts by their order and determine how many fit within the budget.
 684    let mut total_tokens = 0;
 685    let mut included_excerpt_count = 0_usize;
 686    let mut included_file_indices = vec![false; related_files.len()];
 687    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 688    for candidate in &excerpt_candidates {
 689        let file = &related_files[candidate.file_ix];
 690        let excerpt = &file.excerpts[candidate.excerpt_ix];
 691        let file_already_included = included_file_indices[candidate.file_ix];
 692        let header_cost = if file_already_included {
 693            0
 694        } else {
 695            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 696        };
 697        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 698        if total_tokens + header_cost + excerpt_cost > max_tokens {
 699            break;
 700        }
 701        total_tokens += header_cost + excerpt_cost;
 702        if !file_already_included {
 703            included_file_indices[candidate.file_ix] = true;
 704        }
 705        included_excerpt_count += 1;
 706    }
 707
 708    excerpt_candidates.truncate(included_excerpt_count);
 709    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 710
 711    // Render all of the files that fit within the token budget, in the original order.
 712    let mut result = String::new();
 713    let mut last_file_ix = None;
 714    for candidate in &excerpt_candidates {
 715        if last_file_ix != Some(candidate.file_ix) {
 716            if last_file_ix.is_some() {
 717                result.push_str(file_suffix);
 718            }
 719            result.push_str(&file_headers[candidate.file_ix]);
 720            last_file_ix = Some(candidate.file_ix);
 721        }
 722        let file = &related_files[candidate.file_ix];
 723        let excerpt = &file.excerpts[candidate.excerpt_ix];
 724        result.push_str(&excerpt.text);
 725        if !result.ends_with('\n') {
 726            result.push('\n');
 727        }
 728        if excerpt.row_range.end < file.max_row {
 729            result.push_str("...\n");
 730        }
 731    }
 732
 733    result
 734}
 735
 736pub fn write_related_files(
 737    prompt: &mut String,
 738    related_files: &[RelatedFile],
 739) -> Vec<Range<usize>> {
 740    let mut ranges = Vec::new();
 741    for file in related_files {
 742        let start = prompt.len();
 743        let path_str = file.path.to_string_lossy();
 744        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 745        for excerpt in &file.excerpts {
 746            prompt.push_str(&excerpt.text);
 747            if !prompt.ends_with('\n') {
 748                prompt.push('\n');
 749            }
 750            if excerpt.row_range.end < file.max_row {
 751                prompt.push_str("...\n");
 752            }
 753        }
 754        let end = prompt.len();
 755        ranges.push(start..end);
 756    }
 757    ranges
 758}
 759
 760mod v0112_middle_at_end {
 761    use super::*;
 762
 763    pub fn special_tokens() -> &'static [&'static str] {
 764        &[
 765            "<|fim_prefix|>",
 766            "<|fim_suffix|>",
 767            "<|fim_middle|>",
 768            "<|file_sep|>",
 769            CURSOR_MARKER,
 770        ]
 771    }
 772
 773    pub fn write_cursor_excerpt_section(
 774        prompt: &mut String,
 775        path: &Path,
 776        context: &str,
 777        editable_range: &Range<usize>,
 778        cursor_offset: usize,
 779    ) {
 780        let path_str = path.to_string_lossy();
 781        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 782
 783        prompt.push_str("<|fim_prefix|>\n");
 784        prompt.push_str(&context[..editable_range.start]);
 785
 786        prompt.push_str("<|fim_suffix|>\n");
 787        prompt.push_str(&context[editable_range.end..]);
 788        if !prompt.ends_with('\n') {
 789            prompt.push('\n');
 790        }
 791
 792        prompt.push_str("<|fim_middle|>current\n");
 793        prompt.push_str(&context[editable_range.start..cursor_offset]);
 794        prompt.push_str(CURSOR_MARKER);
 795        prompt.push_str(&context[cursor_offset..editable_range.end]);
 796        if !prompt.ends_with('\n') {
 797            prompt.push('\n');
 798        }
 799
 800        prompt.push_str("<|fim_middle|>updated\n");
 801    }
 802}
 803
 804mod v0113_ordered {
 805    use super::*;
 806
 807    pub fn special_tokens() -> &'static [&'static str] {
 808        &[
 809            "<|fim_prefix|>",
 810            "<|fim_suffix|>",
 811            "<|fim_middle|>",
 812            "<|file_sep|>",
 813            CURSOR_MARKER,
 814        ]
 815    }
 816
 817    pub fn write_cursor_excerpt_section(
 818        prompt: &mut String,
 819        path: &Path,
 820        context: &str,
 821        editable_range: &Range<usize>,
 822        cursor_offset: usize,
 823    ) {
 824        let path_str = path.to_string_lossy();
 825        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 826
 827        prompt.push_str("<|fim_prefix|>\n");
 828        prompt.push_str(&context[..editable_range.start]);
 829        if !prompt.ends_with('\n') {
 830            prompt.push('\n');
 831        }
 832
 833        prompt.push_str("<|fim_middle|>current\n");
 834        prompt.push_str(&context[editable_range.start..cursor_offset]);
 835        prompt.push_str(CURSOR_MARKER);
 836        prompt.push_str(&context[cursor_offset..editable_range.end]);
 837        if !prompt.ends_with('\n') {
 838            prompt.push('\n');
 839        }
 840
 841        prompt.push_str("<|fim_suffix|>\n");
 842        prompt.push_str(&context[editable_range.end..]);
 843        if !prompt.ends_with('\n') {
 844            prompt.push('\n');
 845        }
 846
 847        prompt.push_str("<|fim_middle|>updated\n");
 848    }
 849}
 850
 851mod v0114180_editable_region {
 852    use super::*;
 853
 854    pub fn special_tokens() -> &'static [&'static str] {
 855        v0113_ordered::special_tokens()
 856    }
 857}
 858
 859pub mod v0120_git_merge_markers {
 860    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 861    //!
 862    //! Example prompt:
 863    //!
 864    //! <|file_sep|>path/to/target_file.py
 865    //! <|fim_prefix|>
 866    //! code before editable region
 867    //! <|fim_suffix|>
 868    //! code after editable region
 869    //! <|fim_middle|>
 870    //! <<<<<<< CURRENT
 871    //! code that
 872    //! needs to<|user_cursor|>
 873    //! be rewritten
 874    //! =======
 875    //!
 876    //! Expected output (should be generated by the model):
 877    //!
 878    //! updated
 879    //! code with
 880    //! changes applied
 881    //! >>>>>>> UPDATED
 882
 883    use super::*;
 884
 885    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 886    pub const SEPARATOR: &str = "=======\n";
 887    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 888
 889    pub fn special_tokens() -> &'static [&'static str] {
 890        &[
 891            "<|fim_prefix|>",
 892            "<|fim_suffix|>",
 893            "<|fim_middle|>",
 894            "<|file_sep|>",
 895            START_MARKER,
 896            SEPARATOR,
 897            END_MARKER,
 898            CURSOR_MARKER,
 899        ]
 900    }
 901
 902    pub fn write_cursor_excerpt_section(
 903        prompt: &mut String,
 904        path: &Path,
 905        context: &str,
 906        editable_range: &Range<usize>,
 907        cursor_offset: usize,
 908    ) {
 909        let path_str = path.to_string_lossy();
 910        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 911
 912        prompt.push_str("<|fim_prefix|>");
 913        prompt.push_str(&context[..editable_range.start]);
 914
 915        prompt.push_str("<|fim_suffix|>");
 916        prompt.push_str(&context[editable_range.end..]);
 917        if !prompt.ends_with('\n') {
 918            prompt.push('\n');
 919        }
 920
 921        prompt.push_str("<|fim_middle|>");
 922        prompt.push_str(START_MARKER);
 923        prompt.push_str(&context[editable_range.start..cursor_offset]);
 924        prompt.push_str(CURSOR_MARKER);
 925        prompt.push_str(&context[cursor_offset..editable_range.end]);
 926        if !prompt.ends_with('\n') {
 927            prompt.push('\n');
 928        }
 929        prompt.push_str(SEPARATOR);
 930    }
 931}
 932
 933pub mod v0131_git_merge_markers_prefix {
 934    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 935    //!
 936    //! Example prompt:
 937    //!
 938    //! <|file_sep|>path/to/target_file.py
 939    //! <|fim_prefix|>
 940    //! code before editable region
 941    //! <<<<<<< CURRENT
 942    //! code that
 943    //! needs to<|user_cursor|>
 944    //! be rewritten
 945    //! =======
 946    //! <|fim_suffix|>
 947    //! code after editable region
 948    //! <|fim_middle|>
 949    //!
 950    //! Expected output (should be generated by the model):
 951    //!
 952    //! updated
 953    //! code with
 954    //! changes applied
 955    //! >>>>>>> UPDATED
 956
 957    use super::*;
 958
 959    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 960    pub const SEPARATOR: &str = "=======\n";
 961    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 962
 963    pub fn special_tokens() -> &'static [&'static str] {
 964        &[
 965            "<|fim_prefix|>",
 966            "<|fim_suffix|>",
 967            "<|fim_middle|>",
 968            "<|file_sep|>",
 969            START_MARKER,
 970            SEPARATOR,
 971            END_MARKER,
 972            CURSOR_MARKER,
 973        ]
 974    }
 975
 976    pub fn write_cursor_excerpt_section(
 977        prompt: &mut String,
 978        path: &Path,
 979        context: &str,
 980        editable_range: &Range<usize>,
 981        cursor_offset: usize,
 982    ) {
 983        let path_str = path.to_string_lossy();
 984        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 985
 986        prompt.push_str("<|fim_prefix|>");
 987        prompt.push_str(&context[..editable_range.start]);
 988        prompt.push_str(START_MARKER);
 989        prompt.push_str(&context[editable_range.start..cursor_offset]);
 990        prompt.push_str(CURSOR_MARKER);
 991        prompt.push_str(&context[cursor_offset..editable_range.end]);
 992        if !prompt.ends_with('\n') {
 993            prompt.push('\n');
 994        }
 995        prompt.push_str(SEPARATOR);
 996
 997        prompt.push_str("<|fim_suffix|>");
 998        prompt.push_str(&context[editable_range.end..]);
 999        if !prompt.ends_with('\n') {
1000            prompt.push('\n');
1001        }
1002
1003        prompt.push_str("<|fim_middle|>");
1004    }
1005}
1006
1007pub mod v0211_prefill {
1008    use super::*;
1009
1010    pub fn special_tokens() -> &'static [&'static str] {
1011        v0131_git_merge_markers_prefix::special_tokens()
1012    }
1013
1014    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1015        let editable_region = &context[editable_range.start..editable_range.end];
1016
1017        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1018        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1019
1020        // Find a token boundary to avoid splitting tokens in the prefill.
1021        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1022        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1023        // the \n and consume any consecutive \n characters after it.
1024        let prefill = &editable_region[..prefill_len];
1025        match prefill.rfind('\n') {
1026            Some(pos) => {
1027                let mut end = pos + 1;
1028                while end < editable_region.len()
1029                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1030                {
1031                    end += 1;
1032                }
1033                editable_region[..end].to_string()
1034            }
1035            // No newline found. Fall back to splitting before the last space
1036            // (word-level boundary)
1037            None => match prefill.rfind(' ') {
1038                Some(pos) => prefill[..pos].to_string(),
1039                None => prefill.to_string(),
1040            },
1041        }
1042    }
1043}
1044
1045pub mod hashline {
1046
1047    use std::fmt::Display;
1048
1049    pub const END_MARKER: &str = "<|fim_middle|>updated";
1050    pub const START_MARKER: &str = "<|fim_middle|>current";
1051
1052    use super::*;
1053
1054    const SET_COMMAND_MARKER: &str = "<|set|>";
1055    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1056    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1057
1058    pub fn special_tokens() -> &'static [&'static str] {
1059        return &[
1060            SET_COMMAND_MARKER,
1061            "<|set_range|>",
1062            INSERT_COMMAND_MARKER,
1063            NO_EDITS_COMMAND_MARKER,
1064            CURSOR_MARKER,
1065            "<|file_sep|>",
1066            "<|fim_prefix|>",
1067            "<|fim_suffix|>",
1068            "<|fim_middle|>",
1069        ];
1070    }
1071
1072    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1073    #[derive(Debug, Clone, PartialEq, Eq)]
1074    struct LineRef {
1075        index: usize,
1076        hash: u8,
1077    }
1078
1079    impl Display for LineRef {
1080        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1081            write!(f, "{}:{:02x}", self.index, self.hash)
1082        }
1083    }
1084
1085    pub fn hash_line(line: &[u8]) -> u8 {
1086        let mut h: u8 = 0;
1087        for &byte in line {
1088            h = h.wrapping_add(byte);
1089        }
1090        return h;
1091    }
1092
1093    /// Write the hashline-encoded editable region into `out`. Each line of
1094    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1095    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1096    /// to the start of `editable_text`).
1097    pub fn write_hashline_editable_region(
1098        out: &mut String,
1099        editable_text: &str,
1100        cursor_offset_in_editable: usize,
1101    ) {
1102        let mut offset = 0;
1103        for (i, line) in editable_text.lines().enumerate() {
1104            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1105                && cursor_offset_in_editable < offset + line.len()
1106            {
1107                (
1108                    &line[..cursor_offset_in_editable - offset],
1109                    CURSOR_MARKER,
1110                    &line[cursor_offset_in_editable - offset..],
1111                )
1112            } else {
1113                (line, "", "")
1114            };
1115            write!(
1116                out,
1117                "\n{}|{head}{cursor}{tail}",
1118                LineRef {
1119                    index: i,
1120                    hash: hash_line(line.as_bytes())
1121                }
1122            )
1123            .unwrap();
1124            offset += line.len() + 1;
1125        }
1126    }
1127
1128    pub fn write_cursor_excerpt_section(
1129        prompt: &mut String,
1130        path: &Path,
1131        context: &str,
1132        editable_range: &Range<usize>,
1133        cursor_offset: usize,
1134    ) {
1135        let path_str = path.to_string_lossy();
1136        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1137
1138        prompt.push_str("<|fim_prefix|>\n");
1139        prompt.push_str(&context[..editable_range.start]);
1140        prompt.push_str(START_MARKER);
1141
1142        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1143        let editable_region = &context[editable_range.clone()];
1144        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1145
1146        if !prompt.ends_with('\n') {
1147            prompt.push('\n');
1148        }
1149
1150        prompt.push_str("<|fim_suffix|>\n");
1151        prompt.push_str(&context[editable_range.end..]);
1152        if !prompt.ends_with('\n') {
1153            prompt.push('\n');
1154        }
1155
1156        prompt.push_str(END_MARKER);
1157        prompt.push('\n');
1158    }
1159
1160    /// A single edit command parsed from the model output.
1161    #[derive(Debug)]
1162    enum EditCommand<'a> {
1163        /// Replace a range of lines (inclusive on both ends). Single-line set is
1164        /// represented by `start == end`.
1165        Set {
1166            start: LineRef,
1167            end: LineRef,
1168            content: &'a str,
1169        },
1170        /// Insert new lines after the given line, or before the first line if
1171        /// `after` is `None`.
1172        Insert {
1173            after: Option<LineRef>,
1174            content: &'a str,
1175        },
1176    }
1177
1178    /// Parse a line reference like `3:c3` into a `LineRef`.
1179    fn parse_line_ref(s: &str) -> Option<LineRef> {
1180        let (idx_str, hash_str) = s.split_once(':')?;
1181        let index = idx_str.parse::<usize>().ok()?;
1182        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1183        Some(LineRef { index, hash })
1184    }
1185
1186    /// Parse the model output into a list of `EditCommand`s.
1187    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1188        let mut commands = Vec::new();
1189        let mut offset = 0usize;
1190
1191        while offset < model_output.len() {
1192            let next_nl = model_output[offset..]
1193                .find('\n')
1194                .map(|i| offset + i)
1195                .unwrap_or(model_output.len());
1196            let line = &model_output[offset..next_nl];
1197            let line_end = if next_nl < model_output.len() {
1198                next_nl + 1
1199            } else {
1200                next_nl
1201            };
1202
1203            let trimmed = line.trim();
1204            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1205                (true, spec)
1206            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1207                (false, spec)
1208            } else {
1209                offset = line_end;
1210                continue;
1211            };
1212
1213            let mut content_end = line_end;
1214            let mut scan = line_end;
1215
1216            while scan < model_output.len() {
1217                let body_nl = model_output[scan..]
1218                    .find('\n')
1219                    .map(|i| scan + i)
1220                    .unwrap_or(model_output.len());
1221                let body_line = &model_output[scan..body_nl];
1222                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1223                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1224                {
1225                    break;
1226                }
1227                scan = if body_nl < model_output.len() {
1228                    body_nl + 1
1229                } else {
1230                    body_nl
1231                };
1232                content_end = scan;
1233            }
1234
1235            let content = &model_output[line_end..content_end];
1236
1237            if is_set {
1238                if let Some((start_str, end_str)) = specifier.split_once('-') {
1239                    if let (Some(start), Some(end)) =
1240                        (parse_line_ref(start_str), parse_line_ref(end_str))
1241                    {
1242                        commands.push(EditCommand::Set {
1243                            start,
1244                            end,
1245                            content,
1246                        });
1247                    }
1248                } else if let Some(target) = parse_line_ref(specifier) {
1249                    commands.push(EditCommand::Set {
1250                        start: target.clone(),
1251                        end: target,
1252                        content,
1253                    });
1254                }
1255            } else {
1256                let after = parse_line_ref(specifier);
1257                commands.push(EditCommand::Insert { after, content });
1258            }
1259
1260            offset = scan;
1261        }
1262
1263        commands
1264    }
1265
1266    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1267    /// (as opposed to being a plain full-replacement output).
1268    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1269    /// editable region, returning the plain text content.
1270    pub fn strip_hashline_prefixes(region: &str) -> String {
1271        let mut decoded: String = region
1272            .lines()
1273            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1274            .collect::<Vec<_>>()
1275            .join("\n");
1276        if region.ends_with('\n') {
1277            decoded.push('\n');
1278        }
1279        decoded
1280    }
1281
1282    pub fn output_has_edit_commands(model_output: &str) -> bool {
1283        model_output.contains(SET_COMMAND_MARKER)
1284            || model_output.contains(INSERT_COMMAND_MARKER)
1285            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1286    }
1287
1288    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1289    /// original editable region text.
1290    ///
1291    /// `editable_region` is the original text of the editable region (without hash
1292    /// prefixes). `model_output` is the raw model response containing edit commands.
1293    ///
1294    /// Returns the full replacement text for the editable region.
1295    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1296        if model_output
1297            .trim_start()
1298            .starts_with(NO_EDITS_COMMAND_MARKER)
1299        {
1300            return editable_region.to_string();
1301        }
1302
1303        let original_lines: Vec<&str> = editable_region.lines().collect();
1304        let old_hashes: Vec<u8> = original_lines
1305            .iter()
1306            .map(|line| hash_line(line.as_bytes()))
1307            .collect();
1308
1309        let commands = parse_edit_commands(model_output);
1310
1311        // For set operations: indexed by start line → Some((end line index, content))
1312        // For insert operations: indexed by line index → vec of content to insert after
1313        // Insert-before-first is tracked separately.
1314        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1315        let mut insert_before_first: Vec<&str> = Vec::new();
1316        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1317
1318        for command in &commands {
1319            match command {
1320                EditCommand::Set {
1321                    start,
1322                    end,
1323                    content,
1324                } => {
1325                    if start.index < old_hashes.len()
1326                        && end.index < old_hashes.len()
1327                        && start.index <= end.index
1328                        && old_hashes[start.index] == start.hash
1329                        && old_hashes[end.index] == end.hash
1330                    {
1331                        set_ops[start.index] = Some((end.index, *content));
1332                    }
1333                }
1334                EditCommand::Insert { after, content } => match after {
1335                    None => insert_before_first.push(*content),
1336                    Some(line_ref) => {
1337                        if line_ref.index < old_hashes.len()
1338                            && old_hashes[line_ref.index] == line_ref.hash
1339                        {
1340                            insert_after[line_ref.index].push(*content);
1341                        }
1342                    }
1343                },
1344            }
1345        }
1346
1347        let mut result = String::new();
1348
1349        // Emit any insertions before the first line
1350        for content in &insert_before_first {
1351            result.push_str(content);
1352            if !content.ends_with('\n') {
1353                result.push('\n');
1354            }
1355        }
1356
1357        let mut i = 0;
1358        while i < original_lines.len() {
1359            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1360                // Replace lines i..=end_index with the replacement content
1361                result.push_str(replacement);
1362                if !replacement.is_empty() && !replacement.ends_with('\n') {
1363                    result.push('\n');
1364                }
1365                // Emit any insertions after the end of this set range
1366                if *end_index < insert_after.len() {
1367                    for content in &insert_after[*end_index] {
1368                        result.push_str(content);
1369                        if !content.ends_with('\n') {
1370                            result.push('\n');
1371                        }
1372                    }
1373                }
1374                i = end_index + 1;
1375            } else {
1376                // Keep the original line
1377                result.push_str(original_lines[i]);
1378                result.push('\n');
1379                // Emit any insertions after this line
1380                for content in &insert_after[i] {
1381                    result.push_str(content);
1382                    if !content.ends_with('\n') {
1383                        result.push('\n');
1384                    }
1385                }
1386                i += 1;
1387            }
1388        }
1389
1390        // Preserve trailing newline behavior: if the original ended with a
1391        // newline the result already has one; if it didn't, trim the extra one
1392        // we added.
1393        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1394            result.pop();
1395        }
1396
1397        result
1398    }
1399
1400    /// Convert a unified diff patch into hashline edit commands.
1401    ///
1402    /// Parses the unified diff `patch` directly to determine which lines of
1403    /// `old_text` are deleted/replaced and what new lines are added, then emits
1404    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1405    /// `{index}:{hash}` identifiers.
1406    ///
1407    /// `cursor_offset` is an optional byte offset into the first hunk's new
1408    /// text (context + additions) where the cursor marker should be placed.
1409    pub fn patch_to_edit_commands(
1410        old_text: &str,
1411        patch: &str,
1412        cursor_offset: Option<usize>,
1413    ) -> Result<String> {
1414        let old_lines: Vec<&str> = old_text.lines().collect();
1415        let old_hashes: Vec<u8> = old_lines
1416            .iter()
1417            .map(|line| hash_line(line.as_bytes()))
1418            .collect();
1419
1420        let mut result = String::new();
1421        let mut first_hunk = true;
1422
1423        struct Hunk<'a> {
1424            line_range: Range<usize>,
1425            new_text_lines: Vec<&'a str>,
1426            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1427        }
1428
1429        // Parse the patch line by line. We only care about hunk headers,
1430        // context, deletions, and additions.
1431        let mut old_line_index: usize = 0;
1432        let mut current_hunk: Option<Hunk> = None;
1433        // Byte offset tracking within the hunk's new text for cursor placement.
1434        let mut new_text_byte_offset: usize = 0;
1435        // The line index of the last old line seen before/in the current hunk
1436        // (used for insert-after reference).
1437        let mut last_old_line_before_hunk: Option<usize> = None;
1438
1439        fn flush_hunk(
1440            hunk: Hunk,
1441            last_old_line: Option<usize>,
1442            result: &mut String,
1443            old_hashes: &[u8],
1444        ) {
1445            if hunk.line_range.is_empty() {
1446                // Pure insertion — reference the old line to insert after when in bounds.
1447                if let Some(after) = last_old_line
1448                    && let Some(&hash) = old_hashes.get(after)
1449                {
1450                    write!(
1451                        result,
1452                        "{INSERT_COMMAND_MARKER}{}\n",
1453                        LineRef { index: after, hash }
1454                    )
1455                    .unwrap();
1456                } else {
1457                    result.push_str(INSERT_COMMAND_MARKER);
1458                    result.push('\n');
1459                }
1460            } else {
1461                let start = hunk.line_range.start;
1462                let end_exclusive = hunk.line_range.end;
1463                let deleted_line_count = end_exclusive.saturating_sub(start);
1464
1465                if deleted_line_count == 1 {
1466                    if let Some(&hash) = old_hashes.get(start) {
1467                        write!(
1468                            result,
1469                            "{SET_COMMAND_MARKER}{}\n",
1470                            LineRef { index: start, hash }
1471                        )
1472                        .unwrap();
1473                    } else {
1474                        result.push_str(SET_COMMAND_MARKER);
1475                        result.push('\n');
1476                    }
1477                } else {
1478                    let end_inclusive = end_exclusive - 1;
1479                    match (
1480                        old_hashes.get(start).copied(),
1481                        old_hashes.get(end_inclusive).copied(),
1482                    ) {
1483                        (Some(start_hash), Some(end_hash)) => {
1484                            write!(
1485                                result,
1486                                "{SET_COMMAND_MARKER}{}-{}\n",
1487                                LineRef {
1488                                    index: start,
1489                                    hash: start_hash
1490                                },
1491                                LineRef {
1492                                    index: end_inclusive,
1493                                    hash: end_hash
1494                                }
1495                            )
1496                            .unwrap();
1497                        }
1498                        _ => {
1499                            result.push_str(SET_COMMAND_MARKER);
1500                            result.push('\n');
1501                        }
1502                    }
1503                }
1504            }
1505            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1506                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1507                    && line_offset == cursor_line_offset
1508                {
1509                    result.push_str(&line[..char_offset]);
1510                    result.push_str(CURSOR_MARKER);
1511                    result.push_str(&line[char_offset..]);
1512                    continue;
1513                }
1514
1515                result.push_str(line);
1516            }
1517        }
1518
1519        for raw_line in patch.split_inclusive('\n') {
1520            if raw_line.starts_with("@@") {
1521                // Flush any pending change hunk from a previous patch hunk.
1522                if let Some(hunk) = current_hunk.take() {
1523                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1524                }
1525
1526                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1527                // We intentionally do not trust old_start as a direct local index into `old_text`,
1528                // because some patches are produced against a larger file region and carry
1529                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1530                if first_hunk {
1531                    new_text_byte_offset = 0;
1532                    first_hunk = false;
1533                }
1534                continue;
1535            }
1536
1537            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1538                continue;
1539            }
1540            if raw_line.starts_with("\\ No newline") {
1541                continue;
1542            }
1543
1544            if raw_line.starts_with('-') {
1545                // Extend or start a change hunk with this deleted old line.
1546                match &mut current_hunk {
1547                    Some(Hunk {
1548                        line_range: range, ..
1549                    }) => range.end = old_line_index + 1,
1550                    None => {
1551                        current_hunk = Some(Hunk {
1552                            line_range: old_line_index..old_line_index + 1,
1553                            new_text_lines: Vec::new(),
1554                            cursor_line_offset_in_new_text: None,
1555                        });
1556                    }
1557                }
1558                old_line_index += 1;
1559            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1560                // Place cursor marker if cursor_offset falls within this line.
1561                let mut cursor_line_offset = None;
1562                if let Some(cursor_off) = cursor_offset
1563                    && (first_hunk
1564                        || cursor_off >= new_text_byte_offset
1565                            && cursor_off <= new_text_byte_offset + added_content.len())
1566                {
1567                    let line_offset = added_content.floor_char_boundary(
1568                        cursor_off
1569                            .saturating_sub(new_text_byte_offset)
1570                            .min(added_content.len()),
1571                    );
1572                    cursor_line_offset = Some(line_offset);
1573                }
1574
1575                new_text_byte_offset += added_content.len();
1576
1577                let hunk = current_hunk.get_or_insert(Hunk {
1578                    line_range: old_line_index..old_line_index,
1579                    new_text_lines: vec![],
1580                    cursor_line_offset_in_new_text: None,
1581                });
1582                hunk.new_text_lines.push(added_content);
1583                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1584                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1585            } else {
1586                // Context line (starts with ' ' or is empty).
1587                if let Some(hunk) = current_hunk.take() {
1588                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1589                }
1590                last_old_line_before_hunk = Some(old_line_index);
1591                old_line_index += 1;
1592                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1593                new_text_byte_offset += content.len();
1594            }
1595        }
1596
1597        // Flush final group.
1598        if let Some(hunk) = current_hunk.take() {
1599            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1600        }
1601
1602        // Trim a single trailing newline.
1603        if result.ends_with('\n') {
1604            result.pop();
1605        }
1606
1607        if result.is_empty() {
1608            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1609        }
1610
1611        Ok(result)
1612    }
1613
1614    #[cfg(test)]
1615    mod tests {
1616        use super::*;
1617        use indoc::indoc;
1618
1619        #[test]
1620        fn test_format_cursor_region() {
1621            struct Case {
1622                name: &'static str,
1623                context: &'static str,
1624                editable_range: Range<usize>,
1625                cursor_offset: usize,
1626                expected: &'static str,
1627            }
1628
1629            let cases = [
1630                Case {
1631                    name: "basic_cursor_placement",
1632                    context: "hello world\n",
1633                    editable_range: 0..12,
1634                    cursor_offset: 5,
1635                    expected: indoc! {"
1636                    <|file_sep|>test.rs
1637                    <|fim_prefix|>
1638                    <|fim_middle|>current
1639                    0:5c|hello<|user_cursor|> world
1640                    <|fim_suffix|>
1641                    <|fim_middle|>updated
1642                    "},
1643                },
1644                Case {
1645                    name: "multiline_cursor_on_second_line",
1646                    context: "aaa\nbbb\nccc\n",
1647                    editable_range: 0..12,
1648                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1649                    expected: indoc! {"
1650                    <|file_sep|>test.rs
1651                    <|fim_prefix|>
1652                    <|fim_middle|>current
1653                    0:23|aaa
1654                    1:26|b<|user_cursor|>bb
1655                    2:29|ccc
1656                    <|fim_suffix|>
1657                    <|fim_middle|>updated
1658                    "},
1659                },
1660                Case {
1661                    name: "no_trailing_newline_in_context",
1662                    context: "line1\nline2",
1663                    editable_range: 0..11,
1664                    cursor_offset: 3,
1665                    expected: indoc! {"
1666                    <|file_sep|>test.rs
1667                    <|fim_prefix|>
1668                    <|fim_middle|>current
1669                    0:d9|lin<|user_cursor|>e1
1670                    1:da|line2
1671                    <|fim_suffix|>
1672                    <|fim_middle|>updated
1673                    "},
1674                },
1675                Case {
1676                    name: "leading_newline_in_editable_region",
1677                    context: "\nabc\n",
1678                    editable_range: 0..5,
1679                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1680                    expected: indoc! {"
1681                    <|file_sep|>test.rs
1682                    <|fim_prefix|>
1683                    <|fim_middle|>current
1684                    0:00|
1685                    1:26|a<|user_cursor|>bc
1686                    <|fim_suffix|>
1687                    <|fim_middle|>updated
1688                    "},
1689                },
1690                Case {
1691                    name: "with_suffix",
1692                    context: "abc\ndef",
1693                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1694                    cursor_offset: 2,
1695                    expected: indoc! {"
1696                    <|file_sep|>test.rs
1697                    <|fim_prefix|>
1698                    <|fim_middle|>current
1699                    0:26|ab<|user_cursor|>c
1700                    <|fim_suffix|>
1701                    def
1702                    <|fim_middle|>updated
1703                    "},
1704                },
1705                Case {
1706                    name: "unicode_two_byte_chars",
1707                    context: "héllo\n",
1708                    editable_range: 0..7,
1709                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1710                    expected: indoc! {"
1711                    <|file_sep|>test.rs
1712                    <|fim_prefix|>
1713                    <|fim_middle|>current
1714                    0:1b|hé<|user_cursor|>llo
1715                    <|fim_suffix|>
1716                    <|fim_middle|>updated
1717                    "},
1718                },
1719                Case {
1720                    name: "unicode_three_byte_chars",
1721                    context: "日本語\n",
1722                    editable_range: 0..10,
1723                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1724                    expected: indoc! {"
1725                    <|file_sep|>test.rs
1726                    <|fim_prefix|>
1727                    <|fim_middle|>current
1728                    0:80|日本<|user_cursor|>語
1729                    <|fim_suffix|>
1730                    <|fim_middle|>updated
1731                    "},
1732                },
1733                Case {
1734                    name: "unicode_four_byte_chars",
1735                    context: "a🌍b\n",
1736                    editable_range: 0..7,
1737                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1738                    expected: indoc! {"
1739                    <|file_sep|>test.rs
1740                    <|fim_prefix|>
1741                    <|fim_middle|>current
1742                    0:6b|a🌍<|user_cursor|>b
1743                    <|fim_suffix|>
1744                    <|fim_middle|>updated
1745                    "},
1746                },
1747                Case {
1748                    name: "cursor_at_start_of_region_not_placed",
1749                    context: "abc\n",
1750                    editable_range: 0..4,
1751                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1752                    expected: indoc! {"
1753                    <|file_sep|>test.rs
1754                    <|fim_prefix|>
1755                    <|fim_middle|>current
1756                    0:26|abc
1757                    <|fim_suffix|>
1758                    <|fim_middle|>updated
1759                    "},
1760                },
1761                Case {
1762                    name: "cursor_at_end_of_line_not_placed",
1763                    context: "abc\ndef\n",
1764                    editable_range: 0..8,
1765                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1766                    expected: indoc! {"
1767                    <|file_sep|>test.rs
1768                    <|fim_prefix|>
1769                    <|fim_middle|>current
1770                    0:26|abc
1771                    1:2f|def
1772                    <|fim_suffix|>
1773                    <|fim_middle|>updated
1774                    "},
1775                },
1776                Case {
1777                    name: "cursor_offset_relative_to_context_not_editable_region",
1778                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1779                    // write_cursor_excerpt_section must subtract it before comparing against
1780                    // per-line offsets within the editable region.
1781                    context: "pre\naaa\nbbb\nsuf\n",
1782                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1783                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1784                    expected: indoc! {"
1785                    <|file_sep|>test.rs
1786                    <|fim_prefix|>
1787                    pre
1788                    <|fim_middle|>current
1789                    0:23|aaa
1790                    1:26|b<|user_cursor|>bb
1791                    <|fim_suffix|>
1792                    suf
1793                    <|fim_middle|>updated
1794                    "},
1795                },
1796            ];
1797
1798            for case in &cases {
1799                let mut prompt = String::new();
1800                hashline::write_cursor_excerpt_section(
1801                    &mut prompt,
1802                    Path::new("test.rs"),
1803                    case.context,
1804                    &case.editable_range,
1805                    case.cursor_offset,
1806                );
1807                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1808            }
1809        }
1810
1811        #[test]
1812        fn test_apply_edit_commands() {
1813            struct Case {
1814                name: &'static str,
1815                original: &'static str,
1816                model_output: &'static str,
1817                expected: &'static str,
1818            }
1819
1820            let cases = vec![
1821                Case {
1822                    name: "set_single_line",
1823                    original: indoc! {"
1824                    let mut total = 0;
1825                    for product in products {
1826                        total += ;
1827                    }
1828                    total
1829                "},
1830                    model_output: indoc! {"
1831                    <|set|>2:87
1832                        total += product.price;
1833                "},
1834                    expected: indoc! {"
1835                    let mut total = 0;
1836                    for product in products {
1837                        total += product.price;
1838                    }
1839                    total
1840                "},
1841                },
1842                Case {
1843                    name: "set_range",
1844                    original: indoc! {"
1845                    fn foo() {
1846                        let x = 1;
1847                        let y = 2;
1848                        let z = 3;
1849                    }
1850                "},
1851                    model_output: indoc! {"
1852                    <|set|>1:46-3:4a
1853                        let sum = 6;
1854                "},
1855                    expected: indoc! {"
1856                    fn foo() {
1857                        let sum = 6;
1858                    }
1859                "},
1860                },
1861                Case {
1862                    name: "insert_after_line",
1863                    original: indoc! {"
1864                    fn main() {
1865                        let x = 1;
1866                    }
1867                "},
1868                    model_output: indoc! {"
1869                    <|insert|>1:46
1870                        let y = 2;
1871                "},
1872                    expected: indoc! {"
1873                    fn main() {
1874                        let x = 1;
1875                        let y = 2;
1876                    }
1877                "},
1878                },
1879                Case {
1880                    name: "insert_before_first",
1881                    original: indoc! {"
1882                    let x = 1;
1883                    let y = 2;
1884                "},
1885                    model_output: indoc! {"
1886                    <|insert|>
1887                    use std::io;
1888                "},
1889                    expected: indoc! {"
1890                    use std::io;
1891                    let x = 1;
1892                    let y = 2;
1893                "},
1894                },
1895                Case {
1896                    name: "set_with_cursor_marker",
1897                    original: indoc! {"
1898                    fn main() {
1899                        println!();
1900                    }
1901                "},
1902                    model_output: indoc! {"
1903                    <|set|>1:34
1904                        eprintln!(\"<|user_cursor|>\");
1905                "},
1906                    expected: indoc! {"
1907                    fn main() {
1908                        eprintln!(\"<|user_cursor|>\");
1909                    }
1910                "},
1911                },
1912                Case {
1913                    name: "multiple_set_commands",
1914                    original: indoc! {"
1915                    aaa
1916                    bbb
1917                    ccc
1918                    ddd
1919                "},
1920                    model_output: indoc! {"
1921                    <|set|>0:23
1922                    AAA
1923                    <|set|>2:29
1924                    CCC
1925                "},
1926                    expected: indoc! {"
1927                    AAA
1928                    bbb
1929                    CCC
1930                    ddd
1931                "},
1932                },
1933                Case {
1934                    name: "set_range_multiline_replacement",
1935                    original: indoc! {"
1936                    fn handle_submit() {
1937                    }
1938
1939                    fn handle_keystroke() {
1940                "},
1941                    model_output: indoc! {"
1942                    <|set|>0:3f-1:7d
1943                    fn handle_submit(modal_state: &mut ModalState) {
1944                        <|user_cursor|>
1945                    }
1946                "},
1947                    expected: indoc! {"
1948                    fn handle_submit(modal_state: &mut ModalState) {
1949                        <|user_cursor|>
1950                    }
1951
1952                    fn handle_keystroke() {
1953                "},
1954                },
1955                Case {
1956                    name: "no_edit_commands_returns_original",
1957                    original: indoc! {"
1958                    hello
1959                    world
1960                "},
1961                    model_output: "some random text with no commands",
1962                    expected: indoc! {"
1963                    hello
1964                    world
1965                "},
1966                },
1967                Case {
1968                    name: "no_edits_command_returns_original",
1969                    original: indoc! {"
1970                    hello
1971                    world
1972                "},
1973                    model_output: "<|no_edits|>",
1974                    expected: indoc! {"
1975                    hello
1976                    world
1977                "},
1978                },
1979                Case {
1980                    name: "wrong_hash_set_ignored",
1981                    original: indoc! {"
1982                    aaa
1983                    bbb
1984                "},
1985                    model_output: indoc! {"
1986                    <|set|>0:ff
1987                    ZZZ
1988                "},
1989                    expected: indoc! {"
1990                    aaa
1991                    bbb
1992                "},
1993                },
1994                Case {
1995                    name: "insert_and_set_combined",
1996                    original: indoc! {"
1997                    alpha
1998                    beta
1999                    gamma
2000                "},
2001                    model_output: indoc! {"
2002                    <|set|>0:06
2003                    ALPHA
2004                    <|insert|>1:9c
2005                    beta_extra
2006                "},
2007                    expected: indoc! {"
2008                    ALPHA
2009                    beta
2010                    beta_extra
2011                    gamma
2012                "},
2013                },
2014                Case {
2015                    name: "no_trailing_newline_preserved",
2016                    original: "hello\nworld",
2017                    model_output: indoc! {"
2018                    <|set|>0:14
2019                    HELLO
2020                "},
2021                    expected: "HELLO\nworld",
2022                },
2023                Case {
2024                    name: "set_range_hash_mismatch_in_end_bound",
2025                    original: indoc! {"
2026                    one
2027                    two
2028                    three
2029                "},
2030                    model_output: indoc! {"
2031                    <|set|>0:42-2:ff
2032                    ONE_TWO_THREE
2033                "},
2034                    expected: indoc! {"
2035                    one
2036                    two
2037                    three
2038                "},
2039                },
2040                Case {
2041                    name: "set_range_start_greater_than_end_ignored",
2042                    original: indoc! {"
2043                    a
2044                    b
2045                    c
2046                "},
2047                    model_output: indoc! {"
2048                    <|set|>2:63-1:62
2049                    X
2050                "},
2051                    expected: indoc! {"
2052                    a
2053                    b
2054                    c
2055                "},
2056                },
2057                Case {
2058                    name: "insert_out_of_bounds_ignored",
2059                    original: indoc! {"
2060                    x
2061                    y
2062                "},
2063                    model_output: indoc! {"
2064                    <|insert|>99:aa
2065                    z
2066                "},
2067                    expected: indoc! {"
2068                    x
2069                    y
2070                "},
2071                },
2072                Case {
2073                    name: "set_out_of_bounds_ignored",
2074                    original: indoc! {"
2075                    x
2076                    y
2077                "},
2078                    model_output: indoc! {"
2079                    <|set|>99:aa
2080                    z
2081                "},
2082                    expected: indoc! {"
2083                    x
2084                    y
2085                "},
2086                },
2087                Case {
2088                    name: "malformed_set_command_ignored",
2089                    original: indoc! {"
2090                    alpha
2091                    beta
2092                "},
2093                    model_output: indoc! {"
2094                    <|set|>not-a-line-ref
2095                    UPDATED
2096                "},
2097                    expected: indoc! {"
2098                    alpha
2099                    beta
2100                "},
2101                },
2102                Case {
2103                    name: "malformed_insert_hash_treated_as_before_first",
2104                    original: indoc! {"
2105                    alpha
2106                    beta
2107                "},
2108                    model_output: indoc! {"
2109                    <|insert|>1:nothex
2110                    preamble
2111                "},
2112                    expected: indoc! {"
2113                    preamble
2114                    alpha
2115                    beta
2116                "},
2117                },
2118                Case {
2119                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2120                    original: indoc! {"
2121                    cat
2122                    dog
2123                "},
2124                    model_output: indoc! {"
2125                    <|set|>0:38
2126                    CAT
2127                    <|insert|>0:38
2128                    TAIL
2129                "},
2130                    expected: indoc! {"
2131                    CAT
2132                    TAIL
2133                    dog
2134                "},
2135                },
2136                Case {
2137                    name: "overlapping_set_ranges_last_wins",
2138                    original: indoc! {"
2139                    a
2140                    b
2141                    c
2142                    d
2143                "},
2144                    model_output: indoc! {"
2145                    <|set|>0:61-2:63
2146                    FIRST
2147                    <|set|>1:62-3:64
2148                    SECOND
2149                "},
2150                    expected: indoc! {"
2151                    FIRST
2152                    d
2153                "},
2154                },
2155                Case {
2156                    name: "insert_before_first_and_after_line",
2157                    original: indoc! {"
2158                    a
2159                    b
2160                "},
2161                    model_output: indoc! {"
2162                    <|insert|>
2163                    HEAD
2164                    <|insert|>0:61
2165                    MID
2166                "},
2167                    expected: indoc! {"
2168                    HEAD
2169                    a
2170                    MID
2171                    b
2172                "},
2173                },
2174            ];
2175
2176            for case in &cases {
2177                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2178                assert_eq!(result, case.expected, "failed case: {}", case.name);
2179            }
2180        }
2181
2182        #[test]
2183        fn test_output_has_edit_commands() {
2184            assert!(hashline::output_has_edit_commands(&format!(
2185                "{}0:ab\nnew",
2186                SET_COMMAND_MARKER
2187            )));
2188            assert!(hashline::output_has_edit_commands(&format!(
2189                "{}0:ab\nnew",
2190                INSERT_COMMAND_MARKER
2191            )));
2192            assert!(hashline::output_has_edit_commands(&format!(
2193                "some text\n{}1:cd\nstuff",
2194                SET_COMMAND_MARKER
2195            )));
2196            assert!(!hashline::output_has_edit_commands("just plain text"));
2197            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2198            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2199        }
2200
2201        // ---- hashline::patch_to_edit_commands round-trip tests ----
2202
2203        #[test]
2204        fn test_patch_to_edit_commands() {
2205            struct Case {
2206                name: &'static str,
2207                old: &'static str,
2208                patch: &'static str,
2209                expected_new: &'static str,
2210            }
2211
2212            let cases = [
2213                Case {
2214                    name: "single_line_replacement",
2215                    old: indoc! {"
2216                    let mut total = 0;
2217                    for product in products {
2218                        total += ;
2219                    }
2220                    total
2221                "},
2222                    patch: indoc! {"
2223                    @@ -1,5 +1,5 @@
2224                     let mut total = 0;
2225                     for product in products {
2226                    -    total += ;
2227                    +    total += product.price;
2228                     }
2229                     total
2230                "},
2231                    expected_new: indoc! {"
2232                    let mut total = 0;
2233                    for product in products {
2234                        total += product.price;
2235                    }
2236                    total
2237                "},
2238                },
2239                Case {
2240                    name: "multiline_replacement",
2241                    old: indoc! {"
2242                    fn foo() {
2243                        let x = 1;
2244                        let y = 2;
2245                        let z = 3;
2246                    }
2247                "},
2248                    patch: indoc! {"
2249                    @@ -1,5 +1,3 @@
2250                     fn foo() {
2251                    -    let x = 1;
2252                    -    let y = 2;
2253                    -    let z = 3;
2254                    +    let sum = 1 + 2 + 3;
2255                     }
2256                "},
2257                    expected_new: indoc! {"
2258                    fn foo() {
2259                        let sum = 1 + 2 + 3;
2260                    }
2261                "},
2262                },
2263                Case {
2264                    name: "insertion",
2265                    old: indoc! {"
2266                    fn main() {
2267                        let x = 1;
2268                    }
2269                "},
2270                    patch: indoc! {"
2271                    @@ -1,3 +1,4 @@
2272                     fn main() {
2273                         let x = 1;
2274                    +    let y = 2;
2275                     }
2276                "},
2277                    expected_new: indoc! {"
2278                    fn main() {
2279                        let x = 1;
2280                        let y = 2;
2281                    }
2282                "},
2283                },
2284                Case {
2285                    name: "insertion_before_first",
2286                    old: indoc! {"
2287                    let x = 1;
2288                    let y = 2;
2289                "},
2290                    patch: indoc! {"
2291                    @@ -1,2 +1,3 @@
2292                    +use std::io;
2293                     let x = 1;
2294                     let y = 2;
2295                "},
2296                    expected_new: indoc! {"
2297                    use std::io;
2298                    let x = 1;
2299                    let y = 2;
2300                "},
2301                },
2302                Case {
2303                    name: "deletion",
2304                    old: indoc! {"
2305                    aaa
2306                    bbb
2307                    ccc
2308                    ddd
2309                "},
2310                    patch: indoc! {"
2311                    @@ -1,4 +1,2 @@
2312                     aaa
2313                    -bbb
2314                    -ccc
2315                     ddd
2316                "},
2317                    expected_new: indoc! {"
2318                    aaa
2319                    ddd
2320                "},
2321                },
2322                Case {
2323                    name: "multiple_changes",
2324                    old: indoc! {"
2325                    alpha
2326                    beta
2327                    gamma
2328                    delta
2329                    epsilon
2330                "},
2331                    patch: indoc! {"
2332                    @@ -1,5 +1,5 @@
2333                    -alpha
2334                    +ALPHA
2335                     beta
2336                     gamma
2337                    -delta
2338                    +DELTA
2339                     epsilon
2340                "},
2341                    expected_new: indoc! {"
2342                    ALPHA
2343                    beta
2344                    gamma
2345                    DELTA
2346                    epsilon
2347                "},
2348                },
2349                Case {
2350                    name: "replace_with_insertion",
2351                    old: indoc! {r#"
2352                    fn handle() {
2353                        modal_state.close();
2354                        modal_state.dismiss();
2355                "#},
2356                    patch: indoc! {r#"
2357                    @@ -1,3 +1,4 @@
2358                     fn handle() {
2359                         modal_state.close();
2360                    +    eprintln!("");
2361                         modal_state.dismiss();
2362                "#},
2363                    expected_new: indoc! {r#"
2364                    fn handle() {
2365                        modal_state.close();
2366                        eprintln!("");
2367                        modal_state.dismiss();
2368                "#},
2369                },
2370                Case {
2371                    name: "complete_replacement",
2372                    old: indoc! {"
2373                    aaa
2374                    bbb
2375                    ccc
2376                "},
2377                    patch: indoc! {"
2378                    @@ -1,3 +1,3 @@
2379                    -aaa
2380                    -bbb
2381                    -ccc
2382                    +xxx
2383                    +yyy
2384                    +zzz
2385                "},
2386                    expected_new: indoc! {"
2387                    xxx
2388                    yyy
2389                    zzz
2390                "},
2391                },
2392                Case {
2393                    name: "add_function_body",
2394                    old: indoc! {"
2395                    fn foo() {
2396                        modal_state.dismiss();
2397                    }
2398
2399                    fn
2400
2401                    fn handle_keystroke() {
2402                "},
2403                    patch: indoc! {"
2404                    @@ -1,6 +1,8 @@
2405                     fn foo() {
2406                         modal_state.dismiss();
2407                     }
2408
2409                    -fn
2410                    +fn handle_submit() {
2411                    +    todo()
2412                    +}
2413
2414                     fn handle_keystroke() {
2415                "},
2416                    expected_new: indoc! {"
2417                    fn foo() {
2418                        modal_state.dismiss();
2419                    }
2420
2421                    fn handle_submit() {
2422                        todo()
2423                    }
2424
2425                    fn handle_keystroke() {
2426                "},
2427                },
2428                Case {
2429                    name: "with_cursor_offset",
2430                    old: indoc! {r#"
2431                    fn main() {
2432                        println!();
2433                    }
2434                "#},
2435                    patch: indoc! {r#"
2436                        @@ -1,3 +1,3 @@
2437                        fn main() {
2438                        -    println!();
2439                        +    eprintln!("");
2440                        }
2441                    "#},
2442                    expected_new: indoc! {r#"
2443                        fn main() {
2444                            eprintln!("<|user_cursor|>");
2445                        }
2446                    "#},
2447                },
2448                Case {
2449                    name: "non_local_hunk_header_pure_insertion_repro",
2450                    old: indoc! {"
2451                        aaa
2452                        bbb
2453                    "},
2454                    patch: indoc! {"
2455                        @@ -20,2 +20,3 @@
2456                        aaa
2457                        +xxx
2458                        bbb
2459                    "},
2460                    expected_new: indoc! {"
2461                        aaa
2462                        xxx
2463                        bbb
2464                    "},
2465                },
2466                Case {
2467                    name: "empty_patch_produces_no_edits_marker",
2468                    old: indoc! {"
2469                        aaa
2470                        bbb
2471                    "},
2472                    patch: "@@ -20,2 +20,3 @@\n",
2473                    expected_new: indoc! {"
2474                        aaa
2475                        bbb
2476                    "},
2477                },
2478            ];
2479
2480            for case in &cases {
2481                // The cursor_offset for patch_to_edit_commands is relative to
2482                // the first hunk's new text (context + additions). We compute
2483                // it by finding where the marker sits in the expected output
2484                // (which mirrors the new text of the hunk).
2485                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2486
2487                let commands =
2488                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2489                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2490
2491                assert!(
2492                    hashline::output_has_edit_commands(&commands),
2493                    "case {}: expected edit commands, got: {commands:?}",
2494                    case.name,
2495                );
2496
2497                let applied = hashline::apply_edit_commands(case.old, &commands);
2498                assert_eq!(applied, case.expected_new, "case {}", case.name);
2499            }
2500        }
2501    }
2502}
2503
2504pub mod seed_coder {
2505    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2506    //!
2507    //! Seed-Coder uses different FIM tokens and order than Qwen:
2508    //! - SPM order: suffix comes FIRST, then prefix, then middle
2509    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2510    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2511    //!
2512    //! All context (related files, edit history) goes in the PREFIX section.
2513    //! The suffix contains only code after the editable region.
2514    //!
2515    //! Example prompt:
2516    //!
2517    //! <[fim-suffix]>
2518    //! code after editable region
2519    //! <[fim-prefix]><filename>related/file.py
2520    //! related file content
2521    //!
2522    //! <filename>edit_history
2523    //! --- a/some_file.py
2524    //! +++ b/some_file.py
2525    //! -old
2526    //! +new
2527    //!
2528    //! <filename>path/to/target_file.py
2529    //! code before editable region
2530    //! <<<<<<< CURRENT
2531    //! code that
2532    //! needs to<|user_cursor|>
2533    //! be rewritten
2534    //! =======
2535    //! <[fim-middle]>
2536    //!
2537    //! Expected output (model generates):
2538    //!
2539    //! updated
2540    //! code with
2541    //! changes applied
2542    //! >>>>>>> UPDATED
2543
2544    use super::*;
2545
2546    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2547    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2548    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2549    pub const FILE_MARKER: &str = "<filename>";
2550
2551    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2552    pub const SEPARATOR: &str = "=======\n";
2553    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2554
2555    pub const NO_EDITS: &str = "NO_EDITS\n";
2556
2557    pub fn special_tokens() -> &'static [&'static str] {
2558        &[
2559            FIM_SUFFIX,
2560            FIM_PREFIX,
2561            FIM_MIDDLE,
2562            FILE_MARKER,
2563            START_MARKER,
2564            SEPARATOR,
2565            END_MARKER,
2566            CURSOR_MARKER,
2567        ]
2568    }
2569
2570    pub fn write_cursor_excerpt_section(
2571        prompt: &mut String,
2572        path: &Path,
2573        context: &str,
2574        editable_range: &Range<usize>,
2575        cursor_offset: usize,
2576    ) {
2577        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2578        prompt.push_str(&section);
2579    }
2580
2581    pub fn format_prompt_with_budget(
2582        path: &Path,
2583        context: &str,
2584        editable_range: &Range<usize>,
2585        cursor_offset: usize,
2586        events: &[Arc<Event>],
2587        related_files: &[RelatedFile],
2588        max_tokens: usize,
2589    ) -> String {
2590        let suffix_section = build_suffix_section(context, editable_range);
2591        let cursor_prefix_section =
2592            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2593
2594        let suffix_tokens = estimate_tokens(suffix_section.len());
2595        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2596        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2597
2598        let edit_history_section = super::format_edit_history_within_budget(
2599            events,
2600            FILE_MARKER,
2601            "edit_history",
2602            budget_after_cursor,
2603        );
2604        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2605        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2606
2607        let related_files_section = super::format_related_files_within_budget(
2608            related_files,
2609            FILE_MARKER,
2610            "",
2611            budget_after_edit_history,
2612        );
2613
2614        let mut prompt = String::new();
2615        prompt.push_str(&suffix_section);
2616        prompt.push_str(FIM_PREFIX);
2617        prompt.push_str(&related_files_section);
2618        if !related_files_section.is_empty() {
2619            prompt.push('\n');
2620        }
2621        prompt.push_str(&edit_history_section);
2622        if !edit_history_section.is_empty() {
2623            prompt.push('\n');
2624        }
2625        prompt.push_str(&cursor_prefix_section);
2626        prompt.push_str(FIM_MIDDLE);
2627        prompt
2628    }
2629
2630    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2631        let mut section = String::new();
2632        section.push_str(FIM_SUFFIX);
2633        section.push_str(&context[editable_range.end..]);
2634        if !section.ends_with('\n') {
2635            section.push('\n');
2636        }
2637        section
2638    }
2639
2640    fn build_cursor_prefix_section(
2641        path: &Path,
2642        context: &str,
2643        editable_range: &Range<usize>,
2644        cursor_offset: usize,
2645    ) -> String {
2646        let mut section = String::new();
2647        let path_str = path.to_string_lossy();
2648        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2649
2650        section.push_str(&context[..editable_range.start]);
2651        section.push_str(START_MARKER);
2652        section.push_str(&context[editable_range.start..cursor_offset]);
2653        section.push_str(CURSOR_MARKER);
2654        section.push_str(&context[cursor_offset..editable_range.end]);
2655        if !section.ends_with('\n') {
2656            section.push('\n');
2657        }
2658        section.push_str(SEPARATOR);
2659        section
2660    }
2661
2662    /// Format patch as containing no changes if it's empty; otherwise return None.
2663    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2664        // Count lines in the patch
2665        let empty_patch = patch.lines().count() <= 3;
2666        if empty_patch {
2667            Some(format!("{NO_EDITS}{END_MARKER}"))
2668        } else {
2669            None
2670        }
2671    }
2672}
2673
2674pub mod v0304_variable_edit {
2675    //! A prompt format with no fixed editable region. The entire context is shown
2676    //! to the model, and it chooses which text to replace by outputting surrounding
2677    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2678    //! text.
2679    //!
2680    //! Example prompt:
2681    //!
2682    //! <|file_sep|>path/to/file.py
2683    //! zero
2684    //! one
2685    //! two
2686    //! three<|user_cursor|>
2687    //! four
2688    //! five
2689    //! <|fim_prefix|>
2690    //
2691    //! Expected output (model generates):
2692    //!
2693    //! two
2694    //! <|fim_middle|>
2695    //! THREE
2696    //! <|fim_suffix|>
2697    //! four
2698    //!
2699    //! The output means: find "two\n...\nfour" in the context, and replace
2700    //! everything between "two\n" and "four" with "THREE\n".
2701
2702    use super::*;
2703
2704    pub fn special_tokens() -> &'static [&'static str] {
2705        &[
2706            "<|fim_prefix|>",
2707            "<|fim_suffix|>",
2708            "<|fim_middle|>",
2709            "<|file_sep|>",
2710            CURSOR_MARKER,
2711        ]
2712    }
2713
2714    pub fn write_cursor_excerpt_section(
2715        prompt: &mut String,
2716        path: &Path,
2717        context: &str,
2718        cursor_offset: usize,
2719    ) {
2720        let path_str = path.to_string_lossy();
2721        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2722
2723        prompt.push_str(&context[..cursor_offset]);
2724        prompt.push_str(CURSOR_MARKER);
2725        prompt.push_str(&context[cursor_offset..]);
2726        if !prompt.ends_with('\n') {
2727            prompt.push('\n');
2728        }
2729        prompt.push_str("<|fim_prefix|>\n")
2730    }
2731
2732    /// Apply a variable-edit model output to the original context text.
2733    ///
2734    /// The model output has the form:
2735    ///
2736    /// - prefix context lines
2737    /// - `<|fim_middle|>`
2738    /// - new text
2739    /// - `<|fim_suffix|>`
2740    /// - suffix context lines
2741    ///
2742    /// We locate the prefix/suffix context lines in the original text and replace
2743    /// everything between them with the new text.
2744    pub fn apply_variable_edit(
2745        context: &str,
2746        model_output: &str,
2747    ) -> Result<(Range<usize>, String)> {
2748        let (prefix_context, rest) = model_output
2749            .split_once("<|fim_middle|>\n")
2750            .or_else(|| model_output.split_once("<|fim_middle|>"))
2751            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2752
2753        let (new_text, suffix_context) = rest
2754            .split_once("<|fim_suffix|>\n")
2755            .or_else(|| rest.split_once("<|fim_suffix|>"))
2756            .unwrap_or((rest, ""));
2757
2758        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2759            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2760        } else {
2761            suffix_context
2762        };
2763
2764        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2765            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2766            + prefix_context.len();
2767        let suffix_offset = if suffix_context.is_empty() {
2768            context.len()
2769        } else {
2770            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2771                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2772                + prefix_offset
2773        };
2774
2775        let edit_range = prefix_offset..suffix_offset;
2776        return Ok((edit_range, new_text.to_string()));
2777    }
2778
2779    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2780        if needle.is_empty() {
2781            return Some(0);
2782        }
2783
2784        haystack.match_indices(needle).find_map(|(offset, _)| {
2785            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2786            matched_line_start.then_some(offset)
2787        })
2788    }
2789
2790    /// Convert a unified diff patch into the variable-edit output format.
2791    ///
2792    /// Parses `patch` as a unified diff against `old_text` and produces model
2793    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2794    /// delimiters. The diff is resolved by content matching rather than line
2795    /// numbers.
2796    pub fn patch_to_variable_edit_output(
2797        old_text: &str,
2798        patch: &str,
2799        cursor_offset: Option<usize>,
2800    ) -> Result<String> {
2801        // Parse the unified diff into hunks. Each hunk has an `old_context`
2802        // string (context + deleted lines interleaved in order) and a list of
2803        // edits expressed as byte ranges within that context plus replacement
2804        // text.
2805        let hunks = parse_hunks(patch);
2806        if hunks.is_empty() {
2807            return Ok(String::new());
2808        }
2809
2810        // Apply each hunk by finding its old_context in the text and
2811        // performing the edits. We search forward from where the previous
2812        // hunk ended so that hunks are applied in order.
2813        let mut new_text = old_text.to_string();
2814        let mut search_from: usize = 0;
2815        let mut first_hunk_pos: Option<usize> = None;
2816
2817        for hunk in &hunks {
2818            let context_pos = new_text[search_from..]
2819                .find(&hunk.old_context)
2820                .map(|pos| pos + search_from)
2821                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2822
2823            if first_hunk_pos.is_none() {
2824                first_hunk_pos = Some(context_pos);
2825            }
2826
2827            // Apply edits in reverse order so byte offsets remain valid.
2828            for edit in hunk.edits.iter().rev() {
2829                let abs_start = context_pos + edit.range.start;
2830                let abs_end = context_pos + edit.range.end;
2831                new_text.replace_range(abs_start..abs_end, &edit.text);
2832            }
2833
2834            // Advance past this hunk's region in the (now modified) text.
2835            let new_region_len: usize =
2836                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2837                    len + edit.text.len() - (edit.range.end - edit.range.start)
2838                });
2839            search_from = context_pos + new_region_len;
2840        }
2841
2842        // Now we have old_text and new_text. Find the changed line range by
2843        // comparing them.
2844        let old_lines: Vec<&str> = old_text.lines().collect();
2845        let new_lines: Vec<&str> = new_text.lines().collect();
2846
2847        // Find first differing line.
2848        let first_changed_row = old_lines
2849            .iter()
2850            .zip(new_lines.iter())
2851            .position(|(a, b)| a != b)
2852            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2853
2854        // Find last differing line (from the end).
2855        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2856        let common_suffix = old_lines
2857            .iter()
2858            .rev()
2859            .zip(new_lines.iter().rev())
2860            .take(max_suffix)
2861            .take_while(|(a, b)| a == b)
2862            .count();
2863
2864        let old_end = old_lines.len() - common_suffix;
2865        let new_end = new_lines.len() - common_suffix;
2866
2867        if first_changed_row == old_end && first_changed_row == new_end {
2868            return Ok(String::new());
2869        }
2870
2871        // Build the replacement text from new_lines[first_diff..new_end].
2872        let mut merged_new_text = String::new();
2873        for line in &new_lines[first_changed_row..new_end] {
2874            merged_new_text.push_str(line);
2875            merged_new_text.push('\n');
2876        }
2877
2878        // cursor_offset is relative to the first hunk's new content in
2879        // new_text. Translate it to an offset within merged_new_text, which
2880        // only contains lines first_diff..new_end of new_text.
2881        if let Some(hunk_offset) = cursor_offset {
2882            let hunk_start = first_hunk_pos.unwrap_or(0);
2883            let absolute_pos = hunk_start + hunk_offset;
2884
2885            // Byte offset where first_diff starts in new_text.
2886            let merged_start: usize = new_lines[..first_changed_row]
2887                .iter()
2888                .map(|line| line.len() + 1)
2889                .sum();
2890
2891            if absolute_pos >= merged_start {
2892                let relative_offset = absolute_pos - merged_start;
2893                if relative_offset <= merged_new_text.len() {
2894                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2895                }
2896            }
2897        }
2898
2899        // Build output with 2 lines of context above and below.
2900        let context_lines_count = 2;
2901        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2902        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2903
2904        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2905            let pattern = &lines[line_range];
2906            let pattern_len = pattern.len();
2907
2908            let mut count = 0;
2909            for offset in 0..=lines.len() - pattern_len {
2910                if &lines[offset..offset + pattern_len] == pattern {
2911                    count += 1;
2912                }
2913            }
2914            count
2915        }
2916
2917        // Expand prefix and suffix until they are unique
2918        while prefix_start > 0 {
2919            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2920                prefix_start -= 1;
2921            } else {
2922                break;
2923            }
2924        }
2925        while suffix_end < old_lines.len() {
2926            if count_matches(old_end..suffix_end, &old_lines) > 1 {
2927                suffix_end += 1;
2928            } else {
2929                break;
2930            }
2931        }
2932
2933        let mut output = String::new();
2934        for line in &old_lines[prefix_start..first_changed_row] {
2935            output.push_str(line);
2936            output.push('\n');
2937        }
2938        output.push_str("<|fim_middle|>\n");
2939        output.push_str(&merged_new_text);
2940        output.push_str("<|fim_suffix|>\n");
2941        for line in &old_lines[old_end..suffix_end] {
2942            output.push_str(line);
2943            output.push('\n');
2944        }
2945
2946        Ok(output)
2947    }
2948
2949    struct ParsedHunk {
2950        old_context: String,
2951        edits: Vec<ParsedEdit>,
2952    }
2953
2954    struct ParsedEdit {
2955        range: Range<usize>,
2956        text: String,
2957    }
2958
2959    /// Parse a unified diff into content-based hunks. Each hunk contains an
2960    /// `old_context` string (context lines + deleted lines, which together
2961    /// form the text that should be found in the original) and a list of edits
2962    /// expressed as byte ranges within that context.
2963    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2964        let mut hunks = Vec::new();
2965        let mut current: Option<ParsedHunk> = None;
2966
2967        for line in patch.lines() {
2968            if line.starts_with("@@") {
2969                if let Some(hunk) = current.take() {
2970                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2971                        hunks.push(hunk);
2972                    }
2973                }
2974                current = Some(ParsedHunk {
2975                    old_context: String::new(),
2976                    edits: Vec::new(),
2977                });
2978            } else if line.starts_with("---") || line.starts_with("+++") {
2979                continue;
2980            } else if let Some(hunk) = &mut current {
2981                if let Some(added) = line.strip_prefix('+') {
2982                    let pos = hunk.old_context.len();
2983                    if let Some(last_edit) = hunk.edits.last_mut() {
2984                        if last_edit.range.end == pos {
2985                            writeln!(&mut last_edit.text, "{added}").ok();
2986                            continue;
2987                        }
2988                    }
2989                    hunk.edits.push(ParsedEdit {
2990                        range: pos..pos,
2991                        text: format!("{added}\n"),
2992                    });
2993                } else if let Some(removed) = line.strip_prefix('-') {
2994                    let start = hunk.old_context.len();
2995                    writeln!(&mut hunk.old_context, "{removed}").ok();
2996                    let end = hunk.old_context.len();
2997                    if let Some(last_edit) = hunk.edits.last_mut() {
2998                        if last_edit.range.end == start {
2999                            last_edit.range.end = end;
3000                            continue;
3001                        }
3002                    }
3003                    hunk.edits.push(ParsedEdit {
3004                        range: start..end,
3005                        text: String::new(),
3006                    });
3007                } else {
3008                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3009                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3010                }
3011            }
3012        }
3013
3014        if let Some(hunk) = current {
3015            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3016                hunks.push(hunk);
3017            }
3018        }
3019
3020        hunks
3021    }
3022
3023    #[cfg(test)]
3024    mod tests {
3025        use super::*;
3026        use indoc::indoc;
3027
3028        #[test]
3029        fn test_apply_variable_edit() {
3030            struct Case {
3031                name: &'static str,
3032                original: &'static str,
3033                model_output: &'static str,
3034                expected: &'static str,
3035            }
3036
3037            let cases = [
3038                Case {
3039                    name: "simple_single_line_replacement",
3040                    original: indoc! {"
3041                        zero
3042                        one
3043                        two
3044                        three
3045                        four
3046                        five
3047                    "},
3048                    model_output: indoc! {"
3049                        two
3050                        <|fim_middle|>
3051                        THREE
3052                        <|fim_suffix|>
3053                        four
3054                    "},
3055                    expected: indoc! {"
3056                        zero
3057                        one
3058                        two
3059                        THREE
3060                        four
3061                        five
3062                    "},
3063                },
3064                Case {
3065                    name: "multi_line_replacement",
3066                    original: indoc! {"
3067                        a
3068                        b
3069                        c
3070                        d
3071                        e
3072                    "},
3073                    model_output: indoc! {"
3074                        a
3075                        <|fim_middle|>
3076                        B
3077                        C
3078                        D
3079                        <|fim_suffix|>
3080                        e
3081                    "},
3082                    expected: indoc! {"
3083                        a
3084                        B
3085                        C
3086                        D
3087                        e
3088                    "},
3089                },
3090                Case {
3091                    name: "insertion_between_existing_lines",
3092                    original: indoc! {"
3093                        a
3094                        b
3095                        c
3096                    "},
3097                    model_output: indoc! {"
3098                        a
3099                        <|fim_middle|>
3100                        X
3101                        <|fim_suffix|>
3102                        b
3103                    "},
3104                    expected: indoc! {"
3105                        a
3106                        X
3107                        b
3108                        c
3109                    "},
3110                },
3111                Case {
3112                    name: "deletion",
3113                    original: indoc! {"
3114                        a
3115                        b
3116                        c
3117                        d
3118                    "},
3119                    model_output: indoc! {"
3120                        a
3121                        <|fim_middle|>
3122                        <|fim_suffix|>
3123                        c
3124                    "},
3125                    expected: indoc! {"
3126                        a
3127                        c
3128                        d
3129                    "},
3130                },
3131                Case {
3132                    name: "replacement_at_start_no_prefix_context",
3133                    original: indoc! {"
3134                        a
3135                        b
3136                        c
3137                    "},
3138                    model_output: indoc! {"
3139                        <|fim_middle|>
3140                        X
3141                        <|fim_suffix|>
3142                        b
3143                    "},
3144                    expected: indoc! {"
3145                        X
3146                        b
3147                        c
3148                    "},
3149                },
3150                Case {
3151                    name: "replacement_at_end_no_suffix_context",
3152                    original: indoc! {"
3153                        a
3154                        b
3155                        c
3156                    "},
3157                    model_output: indoc! {"
3158                        b
3159                        <|fim_middle|>
3160                        Z
3161                        <|fim_suffix|>
3162                    "},
3163                    expected: indoc! {"
3164                        a
3165                        b
3166                        Z
3167                    "},
3168                },
3169                Case {
3170                    name: "context_with_trailing_newline_is_preserved",
3171                    original: indoc! {"
3172                        a
3173                        b
3174                        c
3175                    "},
3176                    model_output: indoc! {"
3177                        a
3178                        <|fim_middle|>
3179                        B
3180                        <|fim_suffix|>
3181                        c
3182                    "},
3183                    expected: indoc! {"
3184                        a
3185                        B
3186                        c
3187                    "},
3188                },
3189                Case {
3190                    name: "cursor_marker_passes_through_untouched",
3191                    original: indoc! {"
3192                        a
3193                        b
3194                        c
3195                    "},
3196                    model_output: indoc! {"
3197                        a
3198                        <|fim_middle|>
3199                        B<|user_cursor|>B
3200                        <|fim_suffix|>
3201                        c
3202                    "},
3203                    expected: indoc! {"
3204                        a
3205                        B<|user_cursor|>B
3206                        c
3207                    "},
3208                },
3209                Case {
3210                    name: "multiple_prefix_context_lines",
3211                    original: indoc! {"
3212                        a
3213                        b
3214                        c
3215                        d
3216                        e
3217                    "},
3218                    model_output: indoc! {"
3219                        b
3220                        c
3221                        <|fim_middle|>
3222                        D
3223                        <|fim_suffix|>
3224                        e
3225                    "},
3226                    expected: indoc! {"
3227                        a
3228                        b
3229                        c
3230                        D
3231                        e
3232                    "},
3233                },
3234            ];
3235
3236            for case in cases {
3237                let (edit_range, replacement) =
3238                    apply_variable_edit(case.original, case.model_output).unwrap();
3239                let mut edited = case.original.to_string();
3240                edited.replace_range(edit_range, &replacement);
3241                assert_eq!(edited, case.expected, "{}", case.name);
3242            }
3243        }
3244
3245        #[test]
3246        fn test_patch_to_variable_edit() {
3247            struct Case {
3248                name: &'static str,
3249                old: &'static str,
3250                patch: &'static str,
3251                cursor_offset: Option<usize>,
3252                expected_variable_edit: &'static str,
3253                expected_after_apply: &'static str,
3254            }
3255
3256            let cases = [
3257                Case {
3258                    name: "simple_replacement",
3259                    old: indoc! {"
3260                        zero
3261                        one
3262                        two
3263                        three
3264                        four
3265                        five
3266                    "},
3267                    patch: indoc! {"
3268                        @@ -3,3 +3,3 @@
3269                         two
3270                        -three
3271                        +THREE
3272                         four
3273                    "},
3274                    cursor_offset: None,
3275                    expected_variable_edit: indoc! {"
3276                        one
3277                        two
3278                        <|fim_middle|>
3279                        THREE
3280                        <|fim_suffix|>
3281                        four
3282                        five
3283                    "},
3284                    expected_after_apply: indoc! {"
3285                        zero
3286                        one
3287                        two
3288                        THREE
3289                        four
3290                        five
3291                    "},
3292                },
3293                Case {
3294                    name: "insertion",
3295                    old: indoc! {"
3296                        a
3297                        b
3298                        c
3299                        d
3300                        e
3301                    "},
3302                    patch: indoc! {"
3303                        @@ -2,0 +3,1 @@
3304                         b
3305                        +X
3306                         c
3307                    "},
3308                    cursor_offset: None,
3309                    expected_variable_edit: indoc! {"
3310                        a
3311                        b
3312                        <|fim_middle|>
3313                        X
3314                        <|fim_suffix|>
3315                        c
3316                        d
3317                    "},
3318                    expected_after_apply: indoc! {"
3319                        a
3320                        b
3321                        X
3322                        c
3323                        d
3324                        e
3325                    "},
3326                },
3327                Case {
3328                    name: "deletion",
3329                    old: indoc! {"
3330                        a
3331                        b
3332                        c
3333                        d
3334                        e
3335                    "},
3336                    patch: indoc! {"
3337                        @@ -2,3 +2,2 @@
3338                         b
3339                        -c
3340                         d
3341                    "},
3342                    cursor_offset: None,
3343                    expected_variable_edit: indoc! {"
3344                        a
3345                        b
3346                        <|fim_middle|>
3347                        <|fim_suffix|>
3348                        d
3349                        e
3350                    "},
3351                    expected_after_apply: indoc! {"
3352                        a
3353                        b
3354                        d
3355                        e
3356                    "},
3357                },
3358                Case {
3359                    name: "edit_near_start",
3360                    old: indoc! {"
3361                        first
3362                        second
3363                        third
3364                        fourth
3365                    "},
3366                    patch: indoc! {"
3367                        @@ -1,1 +1,1 @@
3368                        -first
3369                        +FIRST
3370                    "},
3371                    cursor_offset: None,
3372                    expected_variable_edit: indoc! {"
3373                        <|fim_middle|>
3374                        FIRST
3375                        <|fim_suffix|>
3376                        second
3377                        third
3378                    "},
3379                    expected_after_apply: indoc! {"
3380                        FIRST
3381                        second
3382                        third
3383                        fourth
3384                    "},
3385                },
3386                Case {
3387                    name: "edit_near_end",
3388                    old: indoc! {"
3389                        first
3390                        second
3391                        third
3392                        fourth
3393                    "},
3394                    patch: indoc! {"
3395                        @@ -4,1 +4,1 @@
3396                        -fourth
3397                        +FOURTH
3398                    "},
3399                    cursor_offset: None,
3400                    expected_variable_edit: indoc! {"
3401                        second
3402                        third
3403                        <|fim_middle|>
3404                        FOURTH
3405                        <|fim_suffix|>
3406                    "},
3407                    expected_after_apply: indoc! {"
3408                        first
3409                        second
3410                        third
3411                        FOURTH
3412                    "},
3413                },
3414                Case {
3415                    name: "cursor_at_start_of_replacement",
3416                    old: indoc! {"
3417                        zero
3418                        one
3419                        two
3420                        three
3421                        four
3422                        five
3423                    "},
3424                    patch: indoc! {"
3425                        @@ -3,3 +3,3 @@
3426                         two
3427                        -three
3428                        +THREE
3429                         four
3430                    "},
3431                    cursor_offset: Some(4),
3432                    expected_variable_edit: indoc! {"
3433                        one
3434                        two
3435                        <|fim_middle|>
3436                        <|user_cursor|>THREE
3437                        <|fim_suffix|>
3438                        four
3439                        five
3440                    "},
3441                    expected_after_apply: indoc! {"
3442                        zero
3443                        one
3444                        two
3445                        <|user_cursor|>THREE
3446                        four
3447                        five
3448                    "},
3449                },
3450                Case {
3451                    name: "cursor_in_middle_of_replacement",
3452                    old: indoc! {"
3453                        zero
3454                        one
3455                        two
3456                        three
3457                        four
3458                        five
3459                    "},
3460                    patch: indoc! {"
3461                        @@ -3,3 +3,3 @@
3462                         two
3463                        -three
3464                        +THREE
3465                         four
3466                    "},
3467                    cursor_offset: Some(6),
3468                    expected_variable_edit: indoc! {"
3469                        one
3470                        two
3471                        <|fim_middle|>
3472                        TH<|user_cursor|>REE
3473                        <|fim_suffix|>
3474                        four
3475                        five
3476                    "},
3477                    expected_after_apply: indoc! {"
3478                        zero
3479                        one
3480                        two
3481                        TH<|user_cursor|>REE
3482                        four
3483                        five
3484                    "},
3485                },
3486                Case {
3487                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3488                    old: indoc! {"
3489                        one
3490                        a
3491                        b
3492                        c
3493                        d
3494                        two
3495                        a
3496                        b
3497                        c
3498                        d
3499                        three
3500                        a
3501                        b
3502                        c
3503                        d
3504                        four
3505                    "},
3506                    patch: indoc! {"
3507                        @@ -4,5 +4,5 @@
3508                         two
3509                         a
3510                         b
3511                        -c
3512                        +C
3513                         d
3514                         three
3515                    "},
3516                    cursor_offset: None,
3517                    expected_variable_edit: indoc! {"
3518                        two
3519                        a
3520                        b
3521                        <|fim_middle|>
3522                        C
3523                        <|fim_suffix|>
3524                        d
3525                        three
3526                    "},
3527                    expected_after_apply: indoc! {"
3528                        one
3529                        a
3530                        b
3531                        c
3532                        d
3533                        two
3534                        a
3535                        b
3536                        C
3537                        d
3538                        three
3539                        a
3540                        b
3541                        c
3542                        d
3543                        four
3544                    "},
3545                },
3546                Case {
3547                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3548                    old: indoc! {"
3549                        {
3550                            {
3551                                one();
3552                            }
3553                        }
3554                        {
3555                            {
3556                                two();
3557                            }
3558                        }
3559                        {
3560                            {
3561                                three();
3562                            }
3563                        }
3564                        {
3565                            {
3566                                four();
3567                            }
3568                        }
3569                    "},
3570                    patch: indoc! {"
3571                        @@ -4,5 +4,5 @@
3572                             {
3573                        -        two();
3574                        +        TWO();
3575                             }
3576                    "},
3577                    cursor_offset: None,
3578                    expected_variable_edit: indoc! {"
3579                                one();
3580                            }
3581                        }
3582                        {
3583                            {
3584                        <|fim_middle|>
3585                                TWO();
3586                        <|fim_suffix|>
3587                            }
3588                        }
3589                        {
3590                            {
3591                                three();
3592                    "},
3593                    expected_after_apply: indoc! {"
3594                        {
3595                            {
3596                                one();
3597                            }
3598                        }
3599                        {
3600                            {
3601                                TWO();
3602                            }
3603                        }
3604                        {
3605                            {
3606                                three();
3607                            }
3608                        }
3609                        {
3610                            {
3611                                four();
3612                            }
3613                        }
3614                    "},
3615                },
3616            ];
3617
3618            for case in cases {
3619                let output =
3620                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3621                        .unwrap_or_else(|error| {
3622                            panic!("failed converting patch for {}: {error}", case.name)
3623                        });
3624                assert_eq!(
3625                    output, case.expected_variable_edit,
3626                    "patch->variable_edit mismatch for {}",
3627                    case.name
3628                );
3629
3630                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3631                    .unwrap_or_else(|error| {
3632                        panic!("failed applying variable_edit for {}: {error}", case.name)
3633                    });
3634                let mut edited_by_variable_edit = case.old.to_string();
3635                edited_by_variable_edit.replace_range(edit_range, &replacement);
3636                assert_eq!(
3637                    edited_by_variable_edit, case.expected_after_apply,
3638                    "variable_edit apply mismatch for {}",
3639                    case.name
3640                );
3641
3642                let (expected_edit_range, expected_replacement) =
3643                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3644                        |error| {
3645                            panic!(
3646                                "failed applying expected variable_edit for {}: {error}",
3647                                case.name
3648                            )
3649                        },
3650                    );
3651                let mut edited_by_expected_variable_edit = case.old.to_string();
3652                edited_by_expected_variable_edit
3653                    .replace_range(expected_edit_range, &expected_replacement);
3654                assert_eq!(
3655                    edited_by_expected_variable_edit, case.expected_after_apply,
3656                    "expected variable_edit apply mismatch for {}",
3657                    case.name
3658                );
3659            }
3660        }
3661
3662        #[test]
3663        fn test_write_cursor_excerpt_section() {
3664            let path = Path::new("test.rs");
3665            let context = "fn main() {\n    hello();\n}\n";
3666            let cursor_offset = 17;
3667            let mut prompt = String::new();
3668            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3669            assert_eq!(
3670                prompt,
3671                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3672            );
3673        }
3674    }
3675}
3676
3677/// The zeta1 prompt format
3678pub mod zeta1 {
3679    use super::*;
3680    use std::fmt::Write;
3681
3682    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3683    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3684    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3685    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3686
3687    const INSTRUCTION_HEADER: &str = concat!(
3688        "### Instruction:\n",
3689        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3690        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3691        "into account the cursor location.\n\n",
3692        "### User Edits:\n\n"
3693    );
3694    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3695    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3696
3697    /// Formats a complete zeta1 prompt from the input events and excerpt.
3698    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3699        let mut prompt = String::with_capacity(
3700            INSTRUCTION_HEADER.len()
3701                + input_events.len()
3702                + EXCERPT_HEADER.len()
3703                + input_excerpt.len()
3704                + RESPONSE_HEADER.len(),
3705        );
3706        prompt.push_str(INSTRUCTION_HEADER);
3707        prompt.push_str(input_events);
3708        prompt.push_str(EXCERPT_HEADER);
3709        prompt.push_str(input_excerpt);
3710        prompt.push_str(RESPONSE_HEADER);
3711        prompt
3712    }
3713
3714    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3715    /// editable and context byte-offset ranges within `cursor_excerpt`.
3716    pub fn format_zeta1_from_input(
3717        input: &ZetaPromptInput,
3718        editable_range: Range<usize>,
3719        context_range: Range<usize>,
3720    ) -> String {
3721        let events = format_zeta1_events(&input.events);
3722        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3723        format_zeta1_prompt(&events, &excerpt)
3724    }
3725
3726    /// Formats events in zeta1 style (oldest first).
3727    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3728        let mut result = String::new();
3729        for event in events {
3730            let event_string = format_zeta1_event(event);
3731            if event_string.is_empty() {
3732                continue;
3733            }
3734            if !result.is_empty() {
3735                result.push_str("\n\n");
3736            }
3737            result.push_str(&event_string);
3738        }
3739        result
3740    }
3741
3742    fn format_zeta1_event(event: &Event) -> String {
3743        match event {
3744            Event::BufferChange {
3745                path,
3746                old_path,
3747                diff,
3748                ..
3749            } => {
3750                let mut prompt = String::new();
3751                if old_path != path {
3752                    writeln!(
3753                        prompt,
3754                        "User renamed {} to {}\n",
3755                        old_path.display(),
3756                        path.display()
3757                    )
3758                    .ok();
3759                }
3760                if !diff.is_empty() {
3761                    write!(
3762                        prompt,
3763                        "User edited {}:\n```diff\n{}\n```",
3764                        path.display(),
3765                        diff
3766                    )
3767                    .ok();
3768                }
3769                prompt
3770            }
3771        }
3772    }
3773
3774    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3775    /// within `cursor_excerpt`.
3776    fn format_zeta1_excerpt(
3777        input: &ZetaPromptInput,
3778        editable_range: Range<usize>,
3779        context_range: Range<usize>,
3780    ) -> String {
3781        let path_str = input.cursor_path.to_string_lossy();
3782        let excerpt = &*input.cursor_excerpt;
3783        let cursor_offset = input.cursor_offset_in_excerpt;
3784
3785        let mut prompt = String::new();
3786        writeln!(&mut prompt, "```{path_str}").ok();
3787
3788        let starts_at_file_beginning =
3789            input.excerpt_start_row == Some(0) && context_range.start == 0;
3790        if starts_at_file_beginning {
3791            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3792        }
3793
3794        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3795
3796        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3797        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3798        prompt.push_str(CURSOR_MARKER);
3799        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3800        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3801
3802        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3803        write!(prompt, "\n```").ok();
3804
3805        prompt
3806    }
3807
3808    /// Cleans zeta1 model output by extracting content between editable region
3809    /// markers and converting the zeta1 cursor marker to the universal one.
3810    /// Returns `None` if the output doesn't contain the expected markers.
3811    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3812        let content = output.replace(CURSOR_MARKER, "");
3813
3814        let content_start = content
3815            .find(EDITABLE_REGION_START_MARKER)
3816            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3817            .map(|pos| {
3818                if content.as_bytes().get(pos) == Some(&b'\n') {
3819                    pos + 1
3820                } else {
3821                    pos
3822                }
3823            })
3824            .unwrap_or(0);
3825
3826        let content_end = content
3827            .find(EDITABLE_REGION_END_MARKER)
3828            .map(|pos| {
3829                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3830                    pos - 1
3831                } else {
3832                    pos
3833                }
3834            })
3835            .unwrap_or(content.len());
3836
3837        if content_start > content_end {
3838            return Some(String::new());
3839        }
3840
3841        let extracted = &content[content_start..content_end];
3842
3843        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3844            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3845            let text_before_cursor = text_before_cursor
3846                .find(EDITABLE_REGION_START_MARKER)
3847                .map(|pos| {
3848                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3849                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3850                        after_marker + 1
3851                    } else {
3852                        after_marker
3853                    }
3854                })
3855                .unwrap_or(0);
3856            let offset_in_extracted = zeta1_cursor_pos
3857                .saturating_sub(text_before_cursor)
3858                .min(extracted.len());
3859            offset_in_extracted
3860        });
3861
3862        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3863        if let Some(offset) = cursor_offset {
3864            result.push_str(&extracted[..offset]);
3865            result.push_str(super::CURSOR_MARKER);
3866            result.push_str(&extracted[offset..]);
3867        } else {
3868            result.push_str(extracted);
3869        }
3870
3871        Some(result)
3872    }
3873}
3874
3875#[cfg(test)]
3876mod tests {
3877    use super::*;
3878    use indoc::indoc;
3879
3880    fn make_input(
3881        cursor_excerpt: &str,
3882        editable_range: Range<usize>,
3883        cursor_offset: usize,
3884        events: Vec<Event>,
3885        related_files: Vec<RelatedFile>,
3886    ) -> ZetaPromptInput {
3887        let context_range = 0..cursor_excerpt.len();
3888        ZetaPromptInput {
3889            cursor_path: Path::new("test.rs").into(),
3890            cursor_excerpt: cursor_excerpt.into(),
3891            cursor_offset_in_excerpt: cursor_offset,
3892            excerpt_start_row: None,
3893            events: events.into_iter().map(Arc::new).collect(),
3894            related_files: Some(related_files),
3895            active_buffer_diagnostics: vec![],
3896            excerpt_ranges: ExcerptRanges {
3897                editable_150: editable_range.clone(),
3898                editable_180: editable_range.clone(),
3899                editable_350: editable_range,
3900                editable_150_context_350: context_range.clone(),
3901                editable_180_context_350: context_range.clone(),
3902                editable_350_context_150: context_range,
3903                ..Default::default()
3904            },
3905            syntax_ranges: None,
3906            experiment: None,
3907            in_open_source_repo: false,
3908            can_collect_data: false,
3909            repo_url: None,
3910        }
3911    }
3912
3913    fn make_input_with_context_range(
3914        excerpt: &str,
3915        editable_range: Range<usize>,
3916        context_range: Range<usize>,
3917        cursor_offset: usize,
3918    ) -> ZetaPromptInput {
3919        ZetaPromptInput {
3920            cursor_path: Path::new("test.rs").into(),
3921            cursor_excerpt: excerpt.into(),
3922            cursor_offset_in_excerpt: cursor_offset,
3923            excerpt_start_row: None,
3924            events: vec![],
3925            related_files: Some(vec![]),
3926            active_buffer_diagnostics: vec![],
3927            excerpt_ranges: ExcerptRanges {
3928                editable_150: editable_range.clone(),
3929                editable_180: editable_range.clone(),
3930                editable_350: editable_range,
3931                editable_150_context_350: context_range.clone(),
3932                editable_180_context_350: context_range.clone(),
3933                editable_350_context_150: context_range,
3934                ..Default::default()
3935            },
3936            syntax_ranges: None,
3937            experiment: None,
3938            in_open_source_repo: false,
3939            can_collect_data: false,
3940            repo_url: None,
3941        }
3942    }
3943
3944    fn make_event(path: &str, diff: &str) -> Event {
3945        Event::BufferChange {
3946            path: Path::new(path).into(),
3947            old_path: Path::new(path).into(),
3948            diff: diff.to_string(),
3949            predicted: false,
3950            in_open_source_repo: false,
3951        }
3952    }
3953
3954    fn make_related_file(path: &str, content: &str) -> RelatedFile {
3955        RelatedFile {
3956            path: Path::new(path).into(),
3957            max_row: content.lines().count() as u32,
3958            excerpts: vec![RelatedExcerpt {
3959                row_range: 0..content.lines().count() as u32,
3960                text: content.into(),
3961                order: 0,
3962            }],
3963            in_open_source_repo: false,
3964        }
3965    }
3966
3967    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3968        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3969    }
3970
3971    #[test]
3972    fn test_no_truncation_when_within_budget() {
3973        let input = make_input(
3974            "prefix\neditable\nsuffix",
3975            7..15,
3976            10,
3977            vec![make_event("a.rs", "-old\n+new\n")],
3978            vec![make_related_file("related.rs", "fn helper() {}\n")],
3979        );
3980
3981        assert_eq!(
3982            format_with_budget(&input, 10000),
3983            indoc! {r#"
3984                <|file_sep|>related.rs
3985                fn helper() {}
3986                <|file_sep|>edit history
3987                --- a/a.rs
3988                +++ b/a.rs
3989                -old
3990                +new
3991                <|file_sep|>test.rs
3992                <|fim_prefix|>
3993                prefix
3994                <|fim_middle|>current
3995                edi<|user_cursor|>table
3996                <|fim_suffix|>
3997
3998                suffix
3999                <|fim_middle|>updated
4000            "#}
4001        );
4002    }
4003
4004    #[test]
4005    fn test_truncation_drops_edit_history_when_budget_tight() {
4006        let input = make_input(
4007            "code",
4008            0..4,
4009            2,
4010            vec![make_event("a.rs", "-x\n+y\n")],
4011            vec![
4012                make_related_file("r1.rs", "a\n"),
4013                make_related_file("r2.rs", "b\n"),
4014            ],
4015        );
4016
4017        assert_eq!(
4018            format_with_budget(&input, 10000),
4019            indoc! {r#"
4020                <|file_sep|>r1.rs
4021                a
4022                <|file_sep|>r2.rs
4023                b
4024                <|file_sep|>edit history
4025                --- a/a.rs
4026                +++ b/a.rs
4027                -x
4028                +y
4029                <|file_sep|>test.rs
4030                <|fim_prefix|>
4031                <|fim_middle|>current
4032                co<|user_cursor|>de
4033                <|fim_suffix|>
4034                <|fim_middle|>updated
4035            "#}
4036        );
4037
4038        assert_eq!(
4039            format_with_budget(&input, 50),
4040            indoc! {r#"
4041                <|file_sep|>r1.rs
4042                a
4043                <|file_sep|>r2.rs
4044                b
4045                <|file_sep|>test.rs
4046                <|fim_prefix|>
4047                <|fim_middle|>current
4048                co<|user_cursor|>de
4049                <|fim_suffix|>
4050                <|fim_middle|>updated
4051            "#}
4052        );
4053    }
4054
4055    #[test]
4056    fn test_truncation_includes_partial_excerpts() {
4057        let input = make_input(
4058            "x",
4059            0..1,
4060            0,
4061            vec![],
4062            vec![RelatedFile {
4063                path: Path::new("big.rs").into(),
4064                max_row: 30,
4065                in_open_source_repo: false,
4066                excerpts: vec![
4067                    RelatedExcerpt {
4068                        row_range: 0..10,
4069                        text: "first excerpt\n".into(),
4070                        order: 0,
4071                    },
4072                    RelatedExcerpt {
4073                        row_range: 10..20,
4074                        text: "second excerpt\n".into(),
4075                        order: 0,
4076                    },
4077                    RelatedExcerpt {
4078                        row_range: 20..30,
4079                        text: "third excerpt\n".into(),
4080                        order: 0,
4081                    },
4082                ],
4083            }],
4084        );
4085
4086        assert_eq!(
4087            format_with_budget(&input, 10000),
4088            indoc! {r#"
4089                <|file_sep|>big.rs
4090                first excerpt
4091                ...
4092                second excerpt
4093                ...
4094                third excerpt
4095                <|file_sep|>test.rs
4096                <|fim_prefix|>
4097                <|fim_middle|>current
4098                <|user_cursor|>x
4099                <|fim_suffix|>
4100                <|fim_middle|>updated
4101            "#}
4102        );
4103
4104        assert_eq!(
4105            format_with_budget(&input, 50),
4106            indoc! {r#"
4107                <|file_sep|>big.rs
4108                first excerpt
4109                ...
4110                <|file_sep|>test.rs
4111                <|fim_prefix|>
4112                <|fim_middle|>current
4113                <|user_cursor|>x
4114                <|fim_suffix|>
4115                <|fim_middle|>updated
4116            "#}
4117        );
4118    }
4119
4120    #[test]
4121    fn test_truncation_prioritizes_lower_order_excerpts() {
4122        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4123        // With tight budget, only the lower-order excerpt from file_b should be included.
4124        let input = make_input(
4125            "x",
4126            0..1,
4127            0,
4128            vec![],
4129            vec![
4130                RelatedFile {
4131                    path: Path::new("file_a.rs").into(),
4132                    max_row: 10,
4133                    in_open_source_repo: false,
4134                    excerpts: vec![RelatedExcerpt {
4135                        row_range: 0..10,
4136                        text: "low priority content\n".into(),
4137                        order: 5,
4138                    }],
4139                },
4140                RelatedFile {
4141                    path: Path::new("file_b.rs").into(),
4142                    max_row: 10,
4143                    in_open_source_repo: false,
4144                    excerpts: vec![RelatedExcerpt {
4145                        row_range: 0..10,
4146                        text: "high priority content\n".into(),
4147                        order: 1,
4148                    }],
4149                },
4150            ],
4151        );
4152
4153        // With large budget, both files included; rendered in stable lexicographic order.
4154        assert_eq!(
4155            format_with_budget(&input, 10000),
4156            indoc! {r#"
4157                <|file_sep|>file_a.rs
4158                low priority content
4159                <|file_sep|>file_b.rs
4160                high priority content
4161                <|file_sep|>test.rs
4162                <|fim_prefix|>
4163                <|fim_middle|>current
4164                <|user_cursor|>x
4165                <|fim_suffix|>
4166                <|fim_middle|>updated
4167            "#}
4168        );
4169
4170        // With tight budget, only file_b (lower order) fits.
4171        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4172        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4173        // file_a would need another 14 tokens, which doesn't fit.
4174        assert_eq!(
4175            format_with_budget(&input, 52),
4176            indoc! {r#"
4177                <|file_sep|>file_b.rs
4178                high priority content
4179                <|file_sep|>test.rs
4180                <|fim_prefix|>
4181                <|fim_middle|>current
4182                <|user_cursor|>x
4183                <|fim_suffix|>
4184                <|fim_middle|>updated
4185            "#}
4186        );
4187    }
4188
4189    #[test]
4190    fn test_truncation_drops_high_order_excerpts_within_file() {
4191        // A single file has excerpts at order 1 and order 3. With a tight budget,
4192        // only the order-1 excerpts are included while the order-3 excerpt is
4193        // dropped — even though they belong to the same file. This also preserves
4194        // the parent invariant: parent outline items have order ≤ their best
4195        // child, so they're always included when any child is.
4196        let input = make_input(
4197            "x",
4198            0..1,
4199            0,
4200            vec![],
4201            vec![RelatedFile {
4202                path: Path::new("mod.rs").into(),
4203                max_row: 30,
4204                in_open_source_repo: false,
4205                excerpts: vec![
4206                    RelatedExcerpt {
4207                        row_range: 0..5,
4208                        text: "mod header\n".into(),
4209                        order: 1,
4210                    },
4211                    RelatedExcerpt {
4212                        row_range: 5..15,
4213                        text: "important fn\n".into(),
4214                        order: 1,
4215                    },
4216                    RelatedExcerpt {
4217                        row_range: 15..30,
4218                        text: "less important fn\n".into(),
4219                        order: 3,
4220                    },
4221                ],
4222            }],
4223        );
4224
4225        // With large budget, all three excerpts included.
4226        assert_eq!(
4227            format_with_budget(&input, 10000),
4228            indoc! {r#"
4229                <|file_sep|>mod.rs
4230                mod header
4231                ...
4232                important fn
4233                ...
4234                less important fn
4235                <|file_sep|>test.rs
4236                <|fim_prefix|>
4237                <|fim_middle|>current
4238                <|user_cursor|>x
4239                <|fim_suffix|>
4240                <|fim_middle|>updated
4241            "#}
4242        );
4243
4244        // With tight budget, only order<=1 excerpts included (header + important fn).
4245        assert_eq!(
4246            format_with_budget(&input, 55),
4247            indoc! {r#"
4248                <|file_sep|>mod.rs
4249                mod header
4250                ...
4251                important fn
4252                ...
4253                <|file_sep|>test.rs
4254                <|fim_prefix|>
4255                <|fim_middle|>current
4256                <|user_cursor|>x
4257                <|fim_suffix|>
4258                <|fim_middle|>updated
4259            "#}
4260        );
4261    }
4262
4263    #[test]
4264    fn test_truncation_drops_older_events_first() {
4265        let input = make_input(
4266            "x",
4267            0..1,
4268            0,
4269            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4270            vec![],
4271        );
4272
4273        assert_eq!(
4274            format_with_budget(&input, 10000),
4275            indoc! {r#"
4276                <|file_sep|>edit history
4277                --- a/old.rs
4278                +++ b/old.rs
4279                -1
4280                --- a/new.rs
4281                +++ b/new.rs
4282                -2
4283                <|file_sep|>test.rs
4284                <|fim_prefix|>
4285                <|fim_middle|>current
4286                <|user_cursor|>x
4287                <|fim_suffix|>
4288                <|fim_middle|>updated
4289            "#}
4290        );
4291
4292        assert_eq!(
4293            format_with_budget(&input, 55),
4294            indoc! {r#"
4295                <|file_sep|>edit history
4296                --- a/new.rs
4297                +++ b/new.rs
4298                -2
4299                <|file_sep|>test.rs
4300                <|fim_prefix|>
4301                <|fim_middle|>current
4302                <|user_cursor|>x
4303                <|fim_suffix|>
4304                <|fim_middle|>updated
4305            "#}
4306        );
4307    }
4308
4309    #[test]
4310    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4311        let input = make_input(
4312            "fn main() {}",
4313            0..12,
4314            3,
4315            vec![make_event("a.rs", "-old\n+new\n")],
4316            vec![make_related_file("related.rs", "helper\n")],
4317        );
4318
4319        assert_eq!(
4320            format_with_budget(&input, 30),
4321            indoc! {r#"
4322                <|file_sep|>test.rs
4323                <|fim_prefix|>
4324                <|fim_middle|>current
4325                fn <|user_cursor|>main() {}
4326                <|fim_suffix|>
4327                <|fim_middle|>updated
4328            "#}
4329        );
4330    }
4331
4332    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4333        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4334    }
4335
4336    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4337        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4338    }
4339
4340    #[test]
4341    fn test_seed_coder_basic_format() {
4342        let input = make_input(
4343            "prefix\neditable\nsuffix",
4344            7..15,
4345            10,
4346            vec![make_event("a.rs", "-old\n+new\n")],
4347            vec![make_related_file("related.rs", "fn helper() {}\n")],
4348        );
4349
4350        assert_eq!(
4351            format_seed_coder(&input),
4352            indoc! {r#"
4353                <[fim-suffix]>
4354                suffix
4355                <[fim-prefix]><filename>related.rs
4356                fn helper() {}
4357
4358                <filename>edit_history
4359                --- a/a.rs
4360                +++ b/a.rs
4361                -old
4362                +new
4363
4364                <filename>test.rs
4365                prefix
4366                <<<<<<< CURRENT
4367                edi<|user_cursor|>table
4368                =======
4369                <[fim-middle]>"#}
4370        );
4371    }
4372
4373    #[test]
4374    fn test_seed_coder_no_context() {
4375        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4376
4377        assert_eq!(
4378            format_seed_coder(&input),
4379            indoc! {r#"
4380                <[fim-suffix]>
4381                after
4382                <[fim-prefix]><filename>test.rs
4383                before
4384                <<<<<<< CURRENT
4385                mid<|user_cursor|>dle
4386                =======
4387                <[fim-middle]>"#}
4388        );
4389    }
4390
4391    #[test]
4392    fn test_seed_coder_truncation_drops_context() {
4393        let input = make_input(
4394            "code",
4395            0..4,
4396            2,
4397            vec![make_event("a.rs", "-x\n+y\n")],
4398            vec![make_related_file("r1.rs", "content\n")],
4399        );
4400
4401        // With large budget, everything is included
4402        assert_eq!(
4403            format_seed_coder(&input),
4404            indoc! {r#"
4405                <[fim-suffix]>
4406                <[fim-prefix]><filename>r1.rs
4407                content
4408
4409                <filename>edit_history
4410                --- a/a.rs
4411                +++ b/a.rs
4412                -x
4413                +y
4414
4415                <filename>test.rs
4416                <<<<<<< CURRENT
4417                co<|user_cursor|>de
4418                =======
4419                <[fim-middle]>"#}
4420        );
4421
4422        // With tight budget, context is dropped but cursor section remains
4423        assert_eq!(
4424            format_seed_coder_with_budget(&input, 30),
4425            indoc! {r#"
4426                <[fim-suffix]>
4427                <[fim-prefix]><filename>test.rs
4428                <<<<<<< CURRENT
4429                co<|user_cursor|>de
4430                =======
4431                <[fim-middle]>"#}
4432        );
4433    }
4434
4435    #[test]
4436    fn test_seed_coder_truncation_prioritizes_lower_order() {
4437        let input = make_input(
4438            "code",
4439            0..4,
4440            2,
4441            vec![],
4442            vec![
4443                RelatedFile {
4444                    path: Path::new("low_prio.rs").into(),
4445                    max_row: 5,
4446                    in_open_source_repo: false,
4447                    excerpts: vec![RelatedExcerpt {
4448                        row_range: 0..5,
4449                        text: "low prio\n".into(),
4450                        order: 10,
4451                    }],
4452                },
4453                RelatedFile {
4454                    path: Path::new("high_prio.rs").into(),
4455                    max_row: 5,
4456                    in_open_source_repo: false,
4457                    excerpts: vec![RelatedExcerpt {
4458                        row_range: 0..5,
4459                        text: "high prio\n".into(),
4460                        order: 1,
4461                    }],
4462                },
4463            ],
4464        );
4465
4466        // With large budget, both included; rendered in stable lexicographic order.
4467        assert_eq!(
4468            format_seed_coder(&input),
4469            indoc! {r#"
4470                <[fim-suffix]>
4471                <[fim-prefix]><filename>low_prio.rs
4472                low prio
4473                <filename>high_prio.rs
4474                high prio
4475
4476                <filename>test.rs
4477                <<<<<<< CURRENT
4478                co<|user_cursor|>de
4479                =======
4480                <[fim-middle]>"#}
4481        );
4482
4483        // With tight budget, only high_prio included.
4484        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4485        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4486        assert_eq!(
4487            format_seed_coder_with_budget(&input, 44),
4488            indoc! {r#"
4489                <[fim-suffix]>
4490                <[fim-prefix]><filename>high_prio.rs
4491                high prio
4492
4493                <filename>test.rs
4494                <<<<<<< CURRENT
4495                co<|user_cursor|>de
4496                =======
4497                <[fim-middle]>"#}
4498        );
4499    }
4500
4501    #[test]
4502    fn test_format_zeta1_from_input_basic() {
4503        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4504        let input = ZetaPromptInput {
4505            cursor_path: Path::new("src/main.rs").into(),
4506            cursor_excerpt: excerpt.into(),
4507            cursor_offset_in_excerpt: 30,
4508            excerpt_start_row: Some(0),
4509            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4510            related_files: Some(vec![]),
4511            active_buffer_diagnostics: vec![],
4512            excerpt_ranges: ExcerptRanges {
4513                editable_150: 15..41,
4514                editable_180: 15..41,
4515                editable_350: 15..41,
4516                editable_150_context_350: 0..excerpt.len(),
4517                editable_180_context_350: 0..excerpt.len(),
4518                editable_350_context_150: 0..excerpt.len(),
4519                ..Default::default()
4520            },
4521            syntax_ranges: None,
4522            experiment: None,
4523            in_open_source_repo: false,
4524            can_collect_data: false,
4525            repo_url: None,
4526        };
4527
4528        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4529
4530        assert_eq!(
4531            prompt,
4532            concat!(
4533                "### Instruction:\n",
4534                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4535                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4536                "into account the cursor location.\n",
4537                "\n",
4538                "### User Edits:\n",
4539                "\n",
4540                "User edited other.rs:\n",
4541                "```diff\n",
4542                "-old\n",
4543                "+new\n",
4544                "\n",
4545                "```\n",
4546                "\n",
4547                "### User Excerpt:\n",
4548                "\n",
4549                "```src/main.rs\n",
4550                "<|start_of_file|>\n",
4551                "fn before() {}\n",
4552                "<|editable_region_start|>\n",
4553                "fn foo() {\n",
4554                "    <|user_cursor_is_here|>let x = 1;\n",
4555                "\n",
4556                "<|editable_region_end|>}\n",
4557                "fn after() {}\n",
4558                "\n",
4559                "```\n",
4560                "\n",
4561                "### Response:\n",
4562            ),
4563        );
4564    }
4565
4566    #[test]
4567    fn test_format_zeta1_from_input_no_start_of_file() {
4568        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4569        let input = ZetaPromptInput {
4570            cursor_path: Path::new("src/main.rs").into(),
4571            cursor_excerpt: excerpt.into(),
4572            cursor_offset_in_excerpt: 15,
4573            excerpt_start_row: Some(10),
4574            events: vec![],
4575            related_files: Some(vec![]),
4576            active_buffer_diagnostics: vec![],
4577            excerpt_ranges: ExcerptRanges {
4578                editable_150: 0..28,
4579                editable_180: 0..28,
4580                editable_350: 0..28,
4581                editable_150_context_350: 0..28,
4582                editable_180_context_350: 0..28,
4583                editable_350_context_150: 0..28,
4584                ..Default::default()
4585            },
4586            syntax_ranges: None,
4587            experiment: None,
4588            in_open_source_repo: false,
4589            can_collect_data: false,
4590            repo_url: None,
4591        };
4592
4593        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4594
4595        assert_eq!(
4596            prompt,
4597            concat!(
4598                "### Instruction:\n",
4599                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4600                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4601                "into account the cursor location.\n",
4602                "\n",
4603                "### User Edits:\n",
4604                "\n",
4605                "\n",
4606                "\n",
4607                "### User Excerpt:\n",
4608                "\n",
4609                "```src/main.rs\n",
4610                "<|editable_region_start|>\n",
4611                "fn foo() {\n",
4612                "    <|user_cursor_is_here|>let x = 1;\n",
4613                "}\n",
4614                "\n",
4615                "<|editable_region_end|>\n",
4616                "```\n",
4617                "\n",
4618                "### Response:\n",
4619            ),
4620        );
4621    }
4622
4623    #[test]
4624    fn test_format_zeta1_from_input_with_sub_ranges() {
4625        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4626        let editable_range = 10..37;
4627        let context_range = 0..excerpt.len();
4628
4629        let input = ZetaPromptInput {
4630            cursor_path: Path::new("test.rs").into(),
4631            cursor_excerpt: excerpt.into(),
4632            cursor_offset_in_excerpt: 25,
4633            excerpt_start_row: Some(0),
4634            events: vec![],
4635            related_files: Some(vec![]),
4636            active_buffer_diagnostics: vec![],
4637            excerpt_ranges: ExcerptRanges {
4638                editable_150: editable_range.clone(),
4639                editable_180: editable_range.clone(),
4640                editable_350: editable_range.clone(),
4641                editable_150_context_350: context_range.clone(),
4642                editable_180_context_350: context_range.clone(),
4643                editable_350_context_150: context_range.clone(),
4644                ..Default::default()
4645            },
4646            syntax_ranges: None,
4647            experiment: None,
4648            in_open_source_repo: false,
4649            can_collect_data: false,
4650            repo_url: None,
4651        };
4652
4653        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4654
4655        assert_eq!(
4656            prompt,
4657            concat!(
4658                "### Instruction:\n",
4659                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4660                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4661                "into account the cursor location.\n",
4662                "\n",
4663                "### User Edits:\n",
4664                "\n",
4665                "\n",
4666                "\n",
4667                "### User Excerpt:\n",
4668                "\n",
4669                "```test.rs\n",
4670                "<|start_of_file|>\n",
4671                "// prefix\n",
4672                "<|editable_region_start|>\n",
4673                "fn foo() {\n",
4674                "    <|user_cursor_is_here|>let x = 1;\n",
4675                "}\n",
4676                "<|editable_region_end|>\n",
4677                "// suffix\n",
4678                "\n",
4679                "```\n",
4680                "\n",
4681                "### Response:\n",
4682            ),
4683        );
4684    }
4685
4686    #[test]
4687    fn test_clean_zeta1_model_output_basic() {
4688        let output = indoc! {"
4689            <|editable_region_start|>
4690            fn main() {
4691                println!(\"hello\");
4692            }
4693            <|editable_region_end|>
4694        "};
4695
4696        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4697        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4698    }
4699
4700    #[test]
4701    fn test_clean_zeta1_model_output_with_cursor() {
4702        let output = indoc! {"
4703            <|editable_region_start|>
4704            fn main() {
4705                <|user_cursor_is_here|>println!(\"hello\");
4706            }
4707            <|editable_region_end|>
4708        "};
4709
4710        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4711        assert_eq!(
4712            cleaned,
4713            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4714        );
4715    }
4716
4717    #[test]
4718    fn test_clean_zeta1_model_output_no_markers() {
4719        let output = "fn main() {}\n";
4720        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4721        assert_eq!(cleaned, "fn main() {}\n");
4722    }
4723
4724    #[test]
4725    fn test_clean_zeta1_model_output_empty_region() {
4726        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4727        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4728        assert_eq!(cleaned, "");
4729    }
4730
4731    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4732        let mut result = excerpt.to_string();
4733        result.replace_range(
4734            parsed_output.range_in_excerpt.clone(),
4735            &parsed_output.new_editable_region,
4736        );
4737        result
4738    }
4739
4740    #[test]
4741    fn test_parse_zeta2_model_output() {
4742        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4743        let context_start = excerpt.find("ctx start").unwrap();
4744        let context_end = excerpt.find("after ctx").unwrap();
4745        let editable_start = excerpt.find("editable old").unwrap();
4746        let editable_end = editable_start + "editable old\n".len();
4747        let input = make_input_with_context_range(
4748            excerpt,
4749            editable_start..editable_end,
4750            context_start..context_end,
4751            editable_start,
4752        );
4753
4754        let output = parse_zeta2_model_output(
4755            "editable new\n>>>>>>> UPDATED\n",
4756            ZetaFormat::V0131GitMergeMarkersPrefix,
4757            &input,
4758        )
4759        .unwrap();
4760
4761        assert_eq!(
4762            apply_edit(excerpt, &output),
4763            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4764        );
4765    }
4766
4767    #[test]
4768    fn test_parse_zeta2_model_output_identity() {
4769        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4770        let editable_start = excerpt.find("bbb").unwrap();
4771        let editable_end = excerpt.find("ddd").unwrap();
4772        let input = make_input_with_context_range(
4773            excerpt,
4774            editable_start..editable_end,
4775            0..excerpt.len(),
4776            editable_start,
4777        );
4778
4779        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4780        let output =
4781            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4782
4783        assert_eq!(apply_edit(excerpt, &output), excerpt);
4784    }
4785
4786    #[test]
4787    fn test_parse_zeta2_model_output_strips_end_marker() {
4788        let excerpt = "hello\nworld\n";
4789        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4790
4791        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4792        let output1 =
4793            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4794        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4795
4796        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4797        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4798    }
4799}