diff --git a/crates/edit_prediction/src/zeta.rs b/crates/edit_prediction/src/zeta.rs index 789ff6c0d7fcc269baf30b5e0fb0e849bc865859..f038d2a4ca1929faee2a02391534539b5b63e2d0 100644 --- a/crates/edit_prediction/src/zeta.rs +++ b/crates/edit_prediction/src/zeta.rs @@ -15,12 +15,10 @@ use release_channel::AppVersion; use settings::EditPredictionPromptFormat; use text::{Anchor, Bias}; -use std::env; -use std::ops::Range; -use std::{path::Path, sync::Arc, time::Instant}; +use std::{env, ops::Range, path::Path, sync::Arc, time::Instant}; use zeta_prompt::{ CURSOR_MARKER, ZetaFormat, clean_zeta2_model_output, format_zeta_prompt, get_prefill, - prompt_input_contains_special_tokens, + output_with_context_for_format, prompt_input_contains_special_tokens, zeta1::{self, EDITABLE_REGION_END_MARKER}, }; @@ -246,6 +244,25 @@ pub fn request_prediction_with_zeta( return Ok((Some((request_id, None, model_version)), usage)); }; + let editable_range_in_buffer = editable_range_in_excerpt.start + + full_context_offset_range.start + ..editable_range_in_excerpt.end + full_context_offset_range.start; + + let mut old_text = snapshot + .text_for_range(editable_range_in_buffer.clone()) + .collect::(); + + // For the hashline format, the model may return <|set|>/<|insert|> + // edit commands instead of a full replacement. Apply them against + // the original editable region to produce the full replacement text. + // This must happen before cursor marker stripping because the cursor + // marker is embedded inside edit command content. + if let Some(rewritten_output) = + output_with_context_for_format(zeta_version, &old_text, &output_text)? + { + output_text = rewritten_output; + } + // Client-side cursor marker processing (applies to both raw and v3 responses) let cursor_offset_in_output = output_text.find(CURSOR_MARKER); if let Some(offset) = cursor_offset_in_output { @@ -265,14 +282,6 @@ pub fn request_prediction_with_zeta( .ok(); } - let editable_range_in_buffer = editable_range_in_excerpt.start - + full_context_offset_range.start - ..editable_range_in_excerpt.end + full_context_offset_range.start; - - let mut old_text = snapshot - .text_for_range(editable_range_in_buffer.clone()) - .collect::(); - if !output_text.is_empty() && !output_text.ends_with('\n') { output_text.push('\n'); } diff --git a/crates/edit_prediction_cli/src/format_prompt.rs b/crates/edit_prediction_cli/src/format_prompt.rs index bee79ae8160eeb815a3739b53a5441f6063fb622..f36eaf2799166d6fbd2b7b212003a1a0644b82c4 100644 --- a/crates/edit_prediction_cli/src/format_prompt.rs +++ b/crates/edit_prediction_cli/src/format_prompt.rs @@ -12,7 +12,8 @@ use similar::DiffableStr; use std::ops::Range; use std::sync::Arc; use zeta_prompt::{ - ZetaFormat, excerpt_range_for_format, format_zeta_prompt, resolve_cursor_region, + ZetaFormat, encode_patch_as_output_for_format, excerpt_range_for_format, format_zeta_prompt, + output_end_marker_for_format, resolve_cursor_region, }; pub async fn run_format_prompt( @@ -101,6 +102,12 @@ pub fn zeta2_output_for_patch( old_editable_region.push('\n'); } + if let Some(encoded_output) = + encode_patch_as_output_for_format(version, &old_editable_region, patch, cursor_offset)? + { + return Ok(encoded_output); + } + let (mut result, first_hunk_offset) = udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable_region).with_context( || { @@ -120,16 +127,11 @@ pub fn zeta2_output_for_patch( result.insert_str(offset, zeta_prompt::CURSOR_MARKER); } - match version { - ZetaFormat::V0120GitMergeMarkers - | ZetaFormat::V0131GitMergeMarkersPrefix - | ZetaFormat::V0211SeedCoder => { - if !result.ends_with('\n') { - result.push('\n'); - } - result.push_str(zeta_prompt::v0120_git_merge_markers::END_MARKER); + if let Some(end_marker) = output_end_marker_for_format(version) { + if !result.ends_with('\n') { + result.push('\n'); } - _ => (), + result.push_str(end_marker); } Ok(result) diff --git a/crates/edit_prediction_cli/src/parse_output.rs b/crates/edit_prediction_cli/src/parse_output.rs index 4b8af44785c1781de772f569c012ee64eee48aad..2c066b8b32b3eaab54ad6e3b3bcb0796ff27f950 100644 --- a/crates/edit_prediction_cli/src/parse_output.rs +++ b/crates/edit_prediction_cli/src/parse_output.rs @@ -6,7 +6,11 @@ use crate::{ }; use anyhow::{Context as _, Result}; use edit_prediction::example_spec::encode_cursor_in_patch; -use zeta_prompt::{CURSOR_MARKER, ZetaFormat}; +use zeta_prompt::{ + CURSOR_MARKER, ZetaFormat, clean_extracted_region_for_format, + current_region_markers_for_format, output_end_marker_for_format, + output_with_context_for_format, +}; pub fn run_parse_output(example: &mut Example) -> Result<()> { example @@ -51,22 +55,7 @@ pub fn parse_prediction_output( } fn extract_zeta2_current_region(prompt: &str, format: ZetaFormat) -> Result { - let (current_marker, end_marker) = match format { - ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"), - ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => { - ("<|fim_middle|>current\n", "<|fim_suffix|>") - } - ZetaFormat::V0120GitMergeMarkers - | ZetaFormat::V0131GitMergeMarkersPrefix - | ZetaFormat::V0211Prefill => ( - zeta_prompt::v0120_git_merge_markers::START_MARKER, - zeta_prompt::v0120_git_merge_markers::SEPARATOR, - ), - ZetaFormat::V0211SeedCoder => ( - zeta_prompt::seed_coder::START_MARKER, - zeta_prompt::seed_coder::SEPARATOR, - ), - }; + let (current_marker, end_marker) = current_region_markers_for_format(format); let start = prompt.find(current_marker).with_context(|| { format!( @@ -82,8 +71,7 @@ fn extract_zeta2_current_region(prompt: &str, format: ZetaFormat) -> Result { - zeta_prompt::v0131_git_merge_markers_prefix::END_MARKER - } - ZetaFormat::V0120GitMergeMarkers => zeta_prompt::v0120_git_merge_markers::END_MARKER, - ZetaFormat::V0112MiddleAtEnd - | ZetaFormat::V0113Ordered - | ZetaFormat::V0114180EditableRegion => "", - ZetaFormat::V0211SeedCoder => zeta_prompt::seed_coder::END_MARKER, - }; - if !suffix.is_empty() { + if let Some(marker) = output_end_marker_for_format(format) { new_text = new_text - .strip_suffix(suffix) + .strip_suffix(marker) .unwrap_or(&new_text) .to_string(); } diff --git a/crates/zeta_prompt/src/zeta_prompt.rs b/crates/zeta_prompt/src/zeta_prompt.rs index 0cd37a455397334933dbfa2464c2dbcb72bba456..2ec12e8bebb4a868c0784e2fe52541a1de580555 100644 --- a/crates/zeta_prompt/src/zeta_prompt.rs +++ b/crates/zeta_prompt/src/zeta_prompt.rs @@ -86,6 +86,7 @@ pub enum ZetaFormat { V0131GitMergeMarkersPrefix, V0211Prefill, V0211SeedCoder, + v0226Hashline, } impl std::fmt::Display for ZetaFormat { @@ -122,25 +123,6 @@ impl ZetaFormat { .collect::>() .concat() } - - pub fn special_tokens(&self) -> &'static [&'static str] { - match self { - ZetaFormat::V0112MiddleAtEnd - | ZetaFormat::V0113Ordered - | ZetaFormat::V0114180EditableRegion => &[ - "<|fim_prefix|>", - "<|fim_suffix|>", - "<|fim_middle|>", - "<|file_sep|>", - CURSOR_MARKER, - ], - ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(), - ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => { - v0131_git_merge_markers_prefix::special_tokens() - } - ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(), - } - } } #[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)] @@ -212,33 +194,29 @@ pub struct RelatedExcerpt { } pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool { - format - .special_tokens() + special_tokens_for_format(format) .iter() .any(|token| input.cursor_excerpt.contains(token)) } pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String { - format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS) + format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS) } -/// Post-processes model output for the given zeta format by stripping format-specific suffixes. -pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str { +pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] { match format { - ZetaFormat::V0120GitMergeMarkers => output - .strip_suffix(v0120_git_merge_markers::END_MARKER) - .unwrap_or(output), - ZetaFormat::V0131GitMergeMarkersPrefix => output - .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER) - .unwrap_or(output), - ZetaFormat::V0211SeedCoder => output - .strip_suffix(seed_coder::END_MARKER) - .unwrap_or(output), - _ => output, + ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(), + ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(), + ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(), + ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(), + ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(), + ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(), + ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(), + ZetaFormat::v0226Hashline => hashline::special_tokens(), } } -pub fn excerpt_range_for_format( +pub fn excerpt_ranges_for_format( format: ZetaFormat, ranges: &ExcerptRanges, ) -> (Range, Range) { @@ -247,129 +225,257 @@ pub fn excerpt_range_for_format( ranges.editable_150.clone(), ranges.editable_150_context_350.clone(), ), - ZetaFormat::V0114180EditableRegion - | ZetaFormat::V0120GitMergeMarkers + ZetaFormat::V0114180EditableRegion => ( + ranges.editable_180.clone(), + ranges.editable_180_context_350.clone(), + ), + ZetaFormat::V0120GitMergeMarkers | ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill - | ZetaFormat::V0211SeedCoder => ( + | ZetaFormat::V0211SeedCoder + | ZetaFormat::v0226Hashline => ( ranges.editable_350.clone(), ranges.editable_350_context_150.clone(), ), } } -pub fn resolve_cursor_region( - input: &ZetaPromptInput, - format: ZetaFormat, -) -> (&str, Range, usize) { - let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges); - let context_start = context_range.start; - let context_text = &input.cursor_excerpt[context_range]; - let adjusted_editable = - (editable_range.start - context_start)..(editable_range.end - context_start); - let adjusted_cursor = input.cursor_offset_in_excerpt - context_start; - - (context_text, adjusted_editable, adjusted_cursor) -} - -fn format_zeta_prompt_with_budget( - input: &ZetaPromptInput, +pub fn write_cursor_excerpt_section_for_format( format: ZetaFormat, - max_tokens: usize, -) -> String { - let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format); - let path = &*input.cursor_path; - - let mut cursor_section = String::new(); + prompt: &mut String, + path: &Path, + context: &str, + editable_range: &Range, + cursor_offset: usize, +) { match format { - ZetaFormat::V0112MiddleAtEnd => { - v0112_middle_at_end::write_cursor_excerpt_section( - &mut cursor_section, - path, - context, - &editable_range, - cursor_offset, - ); - } + ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section( + prompt, + path, + context, + editable_range, + cursor_offset, + ), ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => { v0113_ordered::write_cursor_excerpt_section( - &mut cursor_section, + prompt, path, context, - &editable_range, + editable_range, cursor_offset, ) } ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section( - &mut cursor_section, + prompt, path, context, - &editable_range, + editable_range, cursor_offset, ), ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => { v0131_git_merge_markers_prefix::write_cursor_excerpt_section( - &mut cursor_section, + prompt, path, context, - &editable_range, + editable_range, cursor_offset, ) } - ZetaFormat::V0211SeedCoder => { - return seed_coder::format_prompt_with_budget( + ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section( + prompt, + path, + context, + editable_range, + cursor_offset, + ), + ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section( + prompt, + path, + context, + editable_range, + cursor_offset, + ), + } +} + +pub fn format_prompt_with_budget_for_format( + input: &ZetaPromptInput, + format: ZetaFormat, + max_tokens: usize, +) -> String { + let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format); + let path = &*input.cursor_path; + + match format { + ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget( + path, + context, + &editable_range, + cursor_offset, + &input.events, + &input.related_files, + max_tokens, + ), + _ => { + let mut cursor_section = String::new(); + write_cursor_excerpt_section_for_format( + format, + &mut cursor_section, path, context, &editable_range, cursor_offset, + ); + + let cursor_tokens = estimate_tokens(cursor_section.len()); + let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens); + + let edit_history_section = format_edit_history_within_budget( &input.events, + "<|file_sep|>", + "edit history", + budget_after_cursor, + ); + let edit_history_tokens = estimate_tokens(edit_history_section.len()); + let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens); + + let related_files_section = format_related_files_within_budget( &input.related_files, - max_tokens, + "<|file_sep|>", + "", + budget_after_edit_history, ); + + let mut prompt = String::new(); + prompt.push_str(&related_files_section); + prompt.push_str(&edit_history_section); + prompt.push_str(&cursor_section); + prompt } } - - let cursor_tokens = estimate_tokens(cursor_section.len()); - let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens); - - let edit_history_section = format_edit_history_within_budget( - &input.events, - "<|file_sep|>", - "edit history", - budget_after_cursor, - ); - let edit_history_tokens = estimate_tokens(edit_history_section.len()); - let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens); - - let related_files_section = format_related_files_within_budget( - &input.related_files, - "<|file_sep|>", - "", - budget_after_edit_history, - ); - - let mut prompt = String::new(); - prompt.push_str(&related_files_section); - prompt.push_str(&edit_history_section); - prompt.push_str(&cursor_section); - prompt } -pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String { +pub fn get_prefill_for_format( + format: ZetaFormat, + context: &str, + editable_range: &Range, +) -> String { match format { + ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range), ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion | ZetaFormat::V0120GitMergeMarkers | ZetaFormat::V0131GitMergeMarkersPrefix - | ZetaFormat::V0211SeedCoder => String::new(), - ZetaFormat::V0211Prefill => { - let (context, editable_range, _) = resolve_cursor_region(input, format); - v0211_prefill::get_prefill(context, &editable_range) + | ZetaFormat::V0211SeedCoder + | ZetaFormat::v0226Hashline => String::new(), + } +} + +pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> { + match format { + ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER), + ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER), + ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER), + ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER), + ZetaFormat::V0112MiddleAtEnd + | ZetaFormat::V0113Ordered + | ZetaFormat::V0114180EditableRegion + | ZetaFormat::v0226Hashline => None, + } +} + +pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) { + match format { + ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"), + ZetaFormat::V0113Ordered + | ZetaFormat::V0114180EditableRegion + | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"), + ZetaFormat::V0120GitMergeMarkers + | ZetaFormat::V0131GitMergeMarkersPrefix + | ZetaFormat::V0211Prefill => ( + v0120_git_merge_markers::START_MARKER, + v0120_git_merge_markers::SEPARATOR, + ), + ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR), + } +} + +pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String { + match format { + ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region), + _ => region.to_string(), + } +} + +pub fn encode_patch_as_output_for_format( + format: ZetaFormat, + old_editable_region: &str, + patch: &str, + cursor_offset: Option, +) -> Result> { + match format { + ZetaFormat::v0226Hashline => { + hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some) + } + _ => Ok(None), + } +} + +pub fn output_with_context_for_format( + format: ZetaFormat, + old_editable_region: &str, + output: &str, +) -> Result> { + match format { + ZetaFormat::v0226Hashline => { + if hashline::output_has_edit_commands(output) { + Ok(Some(hashline::apply_edit_commands( + old_editable_region, + output, + ))) + } else { + Ok(None) + } } + _ => Ok(None), } } +/// Post-processes model output for the given zeta format by stripping format-specific suffixes. +pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str { + match output_end_marker_for_format(format) { + Some(marker) => output.strip_suffix(marker).unwrap_or(output), + None => output, + } +} + +pub fn excerpt_range_for_format( + format: ZetaFormat, + ranges: &ExcerptRanges, +) -> (Range, Range) { + excerpt_ranges_for_format(format, ranges) +} + +pub fn resolve_cursor_region( + input: &ZetaPromptInput, + format: ZetaFormat, +) -> (&str, Range, usize) { + let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges); + let context_start = context_range.start; + let context_text = &input.cursor_excerpt[context_range]; + let adjusted_editable = + (editable_range.start - context_start)..(editable_range.end - context_start); + let adjusted_cursor = input.cursor_offset_in_excerpt - context_start; + + (context_text, adjusted_editable, adjusted_cursor) +} + +pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String { + let (context, editable_range, _) = resolve_cursor_region(input, format); + get_prefill_for_format(format, context, &editable_range) +} + fn format_edit_history_within_budget( events: &[Arc], file_marker: &str, @@ -533,6 +639,16 @@ pub fn write_related_files( mod v0112_middle_at_end { use super::*; + pub fn special_tokens() -> &'static [&'static str] { + &[ + "<|fim_prefix|>", + "<|fim_suffix|>", + "<|fim_middle|>", + "<|file_sep|>", + CURSOR_MARKER, + ] + } + pub fn write_cursor_excerpt_section( prompt: &mut String, path: &Path, @@ -567,6 +683,16 @@ mod v0112_middle_at_end { mod v0113_ordered { use super::*; + pub fn special_tokens() -> &'static [&'static str] { + &[ + "<|fim_prefix|>", + "<|fim_suffix|>", + "<|fim_middle|>", + "<|file_sep|>", + CURSOR_MARKER, + ] + } + pub fn write_cursor_excerpt_section( prompt: &mut String, path: &Path, @@ -601,6 +727,14 @@ mod v0113_ordered { } } +mod v0114180_editable_region { + use super::*; + + pub fn special_tokens() -> &'static [&'static str] { + v0113_ordered::special_tokens() + } +} + pub mod v0120_git_merge_markers { //! A prompt that uses git-style merge conflict markers to represent the editable region. //! @@ -752,6 +886,10 @@ pub mod v0131_git_merge_markers_prefix { pub mod v0211_prefill { use super::*; + pub fn special_tokens() -> &'static [&'static str] { + v0131_git_merge_markers_prefix::special_tokens() + } + pub fn get_prefill(context: &str, editable_range: &Range) -> String { let editable_region = &context[editable_range.start..editable_range.end]; @@ -783,6 +921,1413 @@ pub mod v0211_prefill { } } +pub mod hashline { + + use std::fmt::Display; + + pub const END_MARKER: &str = "<|fim_middle|>updated"; + pub const START_MARKER: &str = "<|fim_middle|>current"; + + use super::*; + + const SET_COMMAND_MARKER: &str = "<|set|>"; + const INSERT_COMMAND_MARKER: &str = "<|insert|>"; + + pub fn special_tokens() -> &'static [&'static str] { + return &[ + SET_COMMAND_MARKER, + "<|set_range|>", + INSERT_COMMAND_MARKER, + CURSOR_MARKER, + "<|file_sep|>", + "<|fim_prefix|>", + "<|fim_suffix|>", + "<|fim_middle|>", + ]; + } + + /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3). + #[derive(Debug, Clone, PartialEq, Eq)] + struct LineRef { + index: usize, + hash: u8, + } + + impl Display for LineRef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{:02x}", self.index, self.hash) + } + } + + pub fn hash_line(line: &[u8]) -> u8 { + let mut h: u8 = 0; + for &byte in line { + h = h.wrapping_add(byte); + } + return h; + } + + /// Write the hashline-encoded editable region into `out`. Each line of + /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor + /// marker is inserted at `cursor_offset_in_editable` (byte offset relative + /// to the start of `editable_text`). + pub fn write_hashline_editable_region( + out: &mut String, + editable_text: &str, + cursor_offset_in_editable: usize, + ) { + let mut offset = 0; + for (i, line) in editable_text.lines().enumerate() { + let (head, cursor, tail) = if cursor_offset_in_editable > offset + && cursor_offset_in_editable < offset + line.len() + { + ( + &line[..cursor_offset_in_editable - offset], + CURSOR_MARKER, + &line[cursor_offset_in_editable - offset..], + ) + } else { + (line, "", "") + }; + write!( + out, + "\n{}|{head}{cursor}{tail}", + LineRef { + index: i, + hash: hash_line(line.as_bytes()) + } + ) + .unwrap(); + offset += line.len() + 1; + } + } + + pub fn write_cursor_excerpt_section( + prompt: &mut String, + path: &Path, + context: &str, + editable_range: &Range, + cursor_offset: usize, + ) { + let path_str = path.to_string_lossy(); + write!(prompt, "<|file_sep|>{}\n", path_str).ok(); + + prompt.push_str("<|fim_prefix|>\n"); + prompt.push_str(&context[..editable_range.start]); + prompt.push_str(START_MARKER); + + let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start); + let editable_region = &context[editable_range.clone()]; + write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable); + + if !prompt.ends_with('\n') { + prompt.push('\n'); + } + + prompt.push_str("<|fim_suffix|>\n"); + prompt.push_str(&context[editable_range.end..]); + if !prompt.ends_with('\n') { + prompt.push('\n'); + } + + prompt.push_str(END_MARKER); + } + + /// A single edit command parsed from the model output. + #[derive(Debug)] + enum EditCommand<'a> { + /// Replace a range of lines (inclusive on both ends). Single-line set is + /// represented by `start == end`. + Set { + start: LineRef, + end: LineRef, + content: &'a str, + }, + /// Insert new lines after the given line, or before the first line if + /// `after` is `None`. + Insert { + after: Option, + content: &'a str, + }, + } + + /// Parse a line reference like `3:c3` into a `LineRef`. + fn parse_line_ref(s: &str) -> Option { + let (idx_str, hash_str) = s.split_once(':')?; + let index = idx_str.parse::().ok()?; + let hash = u8::from_str_radix(hash_str, 16).ok()?; + Some(LineRef { index, hash }) + } + + /// Parse the model output into a list of `EditCommand`s. + fn parse_edit_commands(model_output: &str) -> Vec> { + let mut commands = Vec::new(); + let mut offset = 0usize; + + while offset < model_output.len() { + let next_nl = model_output[offset..] + .find('\n') + .map(|i| offset + i) + .unwrap_or(model_output.len()); + let line = &model_output[offset..next_nl]; + let line_end = if next_nl < model_output.len() { + next_nl + 1 + } else { + next_nl + }; + + let trimmed = line.trim(); + let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) { + (true, spec) + } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) { + (false, spec) + } else { + offset = line_end; + continue; + }; + + let mut content_end = line_end; + let mut scan = line_end; + + while scan < model_output.len() { + let body_nl = model_output[scan..] + .find('\n') + .map(|i| scan + i) + .unwrap_or(model_output.len()); + let body_line = &model_output[scan..body_nl]; + if body_line.trim().starts_with(SET_COMMAND_MARKER) + || body_line.trim().starts_with(INSERT_COMMAND_MARKER) + { + break; + } + scan = if body_nl < model_output.len() { + body_nl + 1 + } else { + body_nl + }; + content_end = scan; + } + + let content = &model_output[line_end..content_end]; + + if is_set { + if let Some((start_str, end_str)) = specifier.split_once('-') { + if let (Some(start), Some(end)) = + (parse_line_ref(start_str), parse_line_ref(end_str)) + { + commands.push(EditCommand::Set { + start, + end, + content, + }); + } + } else if let Some(target) = parse_line_ref(specifier) { + commands.push(EditCommand::Set { + start: target.clone(), + end: target, + content, + }); + } + } else { + let after = parse_line_ref(specifier); + commands.push(EditCommand::Insert { after, content }); + } + + offset = scan; + } + + commands + } + + /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands + /// (as opposed to being a plain full-replacement output). + /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded + /// editable region, returning the plain text content. + pub fn strip_hashline_prefixes(region: &str) -> String { + let mut decoded: String = region + .lines() + .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..])) + .collect::>() + .join("\n"); + if region.ends_with('\n') { + decoded.push('\n'); + } + decoded + } + + pub fn output_has_edit_commands(model_output: &str) -> bool { + model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER) + } + + /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the + /// original editable region text. + /// + /// `editable_region` is the original text of the editable region (without hash + /// prefixes). `model_output` is the raw model response containing edit commands. + /// + /// Returns the full replacement text for the editable region. + pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String { + let original_lines: Vec<&str> = editable_region.lines().collect(); + let old_hashes: Vec = original_lines + .iter() + .map(|line| hash_line(line.as_bytes())) + .collect(); + + let commands = parse_edit_commands(model_output); + + // For set operations: indexed by start line → Some((end line index, content)) + // For insert operations: indexed by line index → vec of content to insert after + // Insert-before-first is tracked separately. + let mut set_ops: Vec> = vec![None; original_lines.len()]; + let mut insert_before_first: Vec<&str> = Vec::new(); + let mut insert_after: Vec> = vec![Vec::new(); original_lines.len()]; + + for command in &commands { + match command { + EditCommand::Set { + start, + end, + content, + } => { + if start.index < old_hashes.len() + && end.index < old_hashes.len() + && start.index <= end.index + && old_hashes[start.index] == start.hash + && old_hashes[end.index] == end.hash + { + set_ops[start.index] = Some((end.index, *content)); + } + } + EditCommand::Insert { after, content } => match after { + None => insert_before_first.push(*content), + Some(line_ref) => { + if line_ref.index < old_hashes.len() + && old_hashes[line_ref.index] == line_ref.hash + { + insert_after[line_ref.index].push(*content); + } + } + }, + } + } + + let mut result = String::new(); + + // Emit any insertions before the first line + for content in &insert_before_first { + result.push_str(content); + if !content.ends_with('\n') { + result.push('\n'); + } + } + + let mut i = 0; + while i < original_lines.len() { + if let Some((end_index, replacement)) = set_ops[i].as_ref() { + // Replace lines i..=end_index with the replacement content + result.push_str(replacement); + if !replacement.is_empty() && !replacement.ends_with('\n') { + result.push('\n'); + } + // Emit any insertions after the end of this set range + if *end_index < insert_after.len() { + for content in &insert_after[*end_index] { + result.push_str(content); + if !content.ends_with('\n') { + result.push('\n'); + } + } + } + i = end_index + 1; + } else { + // Keep the original line + result.push_str(original_lines[i]); + result.push('\n'); + // Emit any insertions after this line + for content in &insert_after[i] { + result.push_str(content); + if !content.ends_with('\n') { + result.push('\n'); + } + } + i += 1; + } + } + + // Preserve trailing newline behavior: if the original ended with a + // newline the result already has one; if it didn't, trim the extra one + // we added. + if !editable_region.ends_with('\n') && result.ends_with('\n') { + result.pop(); + } + + result + } + + /// Convert a unified diff patch into hashline edit commands. + /// + /// Parses the unified diff `patch` directly to determine which lines of + /// `old_text` are deleted/replaced and what new lines are added, then emits + /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their + /// `{index}:{hash}` identifiers. + /// + /// `cursor_offset` is an optional byte offset into the first hunk's new + /// text (context + additions) where the cursor marker should be placed. + pub fn patch_to_edit_commands( + old_text: &str, + patch: &str, + cursor_offset: Option, + ) -> Result { + let old_lines: Vec<&str> = old_text.lines().collect(); + let old_hashes: Vec = old_lines + .iter() + .map(|line| hash_line(line.as_bytes())) + .collect(); + + let mut result = String::new(); + let mut first_hunk = true; + + struct Hunk<'a> { + line_range: Range, + new_text_lines: Vec<&'a str>, + cursor_line_offset_in_new_text: Option<(usize, usize)>, + } + + // Parse the patch line by line. We only care about hunk headers, + // context, deletions, and additions. + let mut old_line_index: usize = 0; + let mut current_hunk: Option = None; + // Byte offset tracking within the hunk's new text for cursor placement. + let mut new_text_byte_offset: usize = 0; + // The line index of the last old line seen before/in the current hunk + // (used for insert-after reference). + let mut last_old_line_before_hunk: Option = None; + + fn flush_hunk( + hunk: Hunk, + last_old_line: Option, + result: &mut String, + old_hashes: &[u8], + ) { + if hunk.line_range.is_empty() { + // Pure insertion — reference the old line to insert after when in bounds. + if let Some(after) = last_old_line + && let Some(&hash) = old_hashes.get(after) + { + write!( + result, + "{INSERT_COMMAND_MARKER}{}\n", + LineRef { index: after, hash } + ) + .unwrap(); + } else { + result.push_str(INSERT_COMMAND_MARKER); + result.push('\n'); + } + } else { + let start = hunk.line_range.start; + let end_exclusive = hunk.line_range.end; + let deleted_line_count = end_exclusive.saturating_sub(start); + + if deleted_line_count == 1 { + if let Some(&hash) = old_hashes.get(start) { + write!( + result, + "{SET_COMMAND_MARKER}{}\n", + LineRef { index: start, hash } + ) + .unwrap(); + } else { + result.push_str(SET_COMMAND_MARKER); + result.push('\n'); + } + } else { + let end_inclusive = end_exclusive - 1; + match ( + old_hashes.get(start).copied(), + old_hashes.get(end_inclusive).copied(), + ) { + (Some(start_hash), Some(end_hash)) => { + write!( + result, + "{SET_COMMAND_MARKER}{}-{}\n", + LineRef { + index: start, + hash: start_hash + }, + LineRef { + index: end_inclusive, + hash: end_hash + } + ) + .unwrap(); + } + _ => { + result.push_str(SET_COMMAND_MARKER); + result.push('\n'); + } + } + } + } + for (line_offset, line) in hunk.new_text_lines.iter().enumerate() { + if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text + && line_offset == cursor_line_offset + { + result.push_str(&line[..char_offset]); + result.push_str(CURSOR_MARKER); + result.push_str(&line[char_offset..]); + continue; + } + + result.push_str(line); + } + } + + for raw_line in patch.split_inclusive('\n') { + if raw_line.starts_with("@@") { + // Flush any pending change hunk from a previous patch hunk. + if let Some(hunk) = current_hunk.take() { + flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes); + } + + // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@ + // We intentionally do not trust old_start as a direct local index into `old_text`, + // because some patches are produced against a larger file region and carry + // non-local line numbers. We keep indexing local by advancing from parsed patch lines. + if first_hunk { + new_text_byte_offset = 0; + first_hunk = false; + } + continue; + } + + if raw_line.starts_with("---") || raw_line.starts_with("+++") { + continue; + } + if raw_line.starts_with("\\ No newline") { + continue; + } + + if raw_line.starts_with('-') { + // Extend or start a change hunk with this deleted old line. + match &mut current_hunk { + Some(Hunk { + line_range: range, .. + }) => range.end = old_line_index + 1, + None => { + current_hunk = Some(Hunk { + line_range: old_line_index..old_line_index + 1, + new_text_lines: Vec::new(), + cursor_line_offset_in_new_text: None, + }); + } + } + old_line_index += 1; + } else if let Some(added_content) = raw_line.strip_prefix('+') { + // Place cursor marker if cursor_offset falls within this line. + let mut cursor_line_offset = None; + if let Some(cursor_off) = cursor_offset + && (first_hunk + || cursor_off >= new_text_byte_offset + && cursor_off <= new_text_byte_offset + added_content.len()) + { + let line_offset = added_content.floor_char_boundary( + cursor_off + .saturating_sub(new_text_byte_offset) + .min(added_content.len()), + ); + cursor_line_offset = Some(line_offset); + } + + new_text_byte_offset += added_content.len(); + + let hunk = current_hunk.get_or_insert(Hunk { + line_range: old_line_index..old_line_index, + new_text_lines: vec![], + cursor_line_offset_in_new_text: None, + }); + hunk.new_text_lines.push(added_content); + hunk.cursor_line_offset_in_new_text = cursor_line_offset + .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line)); + } else { + // Context line (starts with ' ' or is empty). + if let Some(hunk) = current_hunk.take() { + flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes); + } + last_old_line_before_hunk = Some(old_line_index); + old_line_index += 1; + let content = raw_line.strip_prefix(' ').unwrap_or(raw_line); + new_text_byte_offset += content.len(); + } + } + + // Flush final group. + if let Some(hunk) = current_hunk.take() { + flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes); + } + + // Trim a single trailing newline. + if result.ends_with('\n') { + result.pop(); + } + + Ok(result) + } + + #[cfg(test)] + mod tests { + use super::*; + use indoc::indoc; + + #[test] + fn test_format_cursor_region() { + struct Case { + name: &'static str, + context: &'static str, + editable_range: Range, + cursor_offset: usize, + expected: &'static str, + } + + let cases = [ + Case { + name: "basic_cursor_placement", + context: "hello world\n", + editable_range: 0..12, + cursor_offset: 5, + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:5c|hello<|user_cursor|> world + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "multiline_cursor_on_second_line", + context: "aaa\nbbb\nccc\n", + editable_range: 0..12, + cursor_offset: 5, // byte 5 → 1 byte into "bbb" + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:23|aaa + 1:26|b<|user_cursor|>bb + 2:29|ccc + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "no_trailing_newline_in_context", + context: "line1\nline2", + editable_range: 0..11, + cursor_offset: 3, + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:d9|lin<|user_cursor|>e1 + 1:da|line2 + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "leading_newline_in_editable_region", + context: "\nabc\n", + editable_range: 0..5, + cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n) + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:00| + 1:26|a<|user_cursor|>bc + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "with_suffix", + context: "abc\ndef", + editable_range: 0..4, // editable region = "abc\n", suffix = "def" + cursor_offset: 2, + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:26|ab<|user_cursor|>c + <|fim_suffix|> + def + <|fim_middle|>updated"}, + }, + Case { + name: "unicode_two_byte_chars", + context: "héllo\n", + editable_range: 0..7, + cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo" + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:1b|hé<|user_cursor|>llo + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "unicode_three_byte_chars", + context: "日本語\n", + editable_range: 0..10, + cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語" + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:80|日本<|user_cursor|>語 + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "unicode_four_byte_chars", + context: "a🌍b\n", + editable_range: 0..7, + cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b" + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:6b|a🌍<|user_cursor|>b + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "cursor_at_start_of_region_not_placed", + context: "abc\n", + editable_range: 0..4, + cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:26|abc + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "cursor_at_end_of_line_not_placed", + context: "abc\ndef\n", + editable_range: 0..8, + cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + 0:26|abc + 1:2f|def + <|fim_suffix|> + <|fim_middle|>updated"}, + }, + Case { + name: "cursor_offset_relative_to_context_not_editable_region", + // cursor_offset is relative to `context`, so when editable_range.start > 0, + // write_cursor_excerpt_section must subtract it before comparing against + // per-line offsets within the editable region. + context: "pre\naaa\nbbb\nsuf\n", + editable_range: 4..12, // editable region = "aaa\nbbb\n" + cursor_offset: 9, // byte 9 in context = second 'b' in "bbb" + expected: indoc! {" + <|file_sep|>test.rs + <|fim_prefix|> + pre + <|fim_middle|>current + 0:23|aaa + 1:26|b<|user_cursor|>bb + <|fim_suffix|> + suf + <|fim_middle|>updated"}, + }, + ]; + + for case in &cases { + let mut prompt = String::new(); + hashline::write_cursor_excerpt_section( + &mut prompt, + Path::new("test.rs"), + case.context, + &case.editable_range, + case.cursor_offset, + ); + assert_eq!(prompt, case.expected, "failed case: {}", case.name); + } + } + + #[test] + fn test_apply_edit_commands() { + struct Case { + name: &'static str, + original: &'static str, + model_output: &'static str, + expected: &'static str, + } + + let cases = vec![ + Case { + name: "set_single_line", + original: indoc! {" + let mut total = 0; + for product in products { + total += ; + } + total + "}, + model_output: indoc! {" + <|set|>2:87 + total += product.price; + "}, + expected: indoc! {" + let mut total = 0; + for product in products { + total += product.price; + } + total + "}, + }, + Case { + name: "set_range", + original: indoc! {" + fn foo() { + let x = 1; + let y = 2; + let z = 3; + } + "}, + model_output: indoc! {" + <|set|>1:46-3:4a + let sum = 6; + "}, + expected: indoc! {" + fn foo() { + let sum = 6; + } + "}, + }, + Case { + name: "insert_after_line", + original: indoc! {" + fn main() { + let x = 1; + } + "}, + model_output: indoc! {" + <|insert|>1:46 + let y = 2; + "}, + expected: indoc! {" + fn main() { + let x = 1; + let y = 2; + } + "}, + }, + Case { + name: "insert_before_first", + original: indoc! {" + let x = 1; + let y = 2; + "}, + model_output: indoc! {" + <|insert|> + use std::io; + "}, + expected: indoc! {" + use std::io; + let x = 1; + let y = 2; + "}, + }, + Case { + name: "set_with_cursor_marker", + original: indoc! {" + fn main() { + println!(); + } + "}, + model_output: indoc! {" + <|set|>1:34 + eprintln!(\"<|user_cursor|>\"); + "}, + expected: indoc! {" + fn main() { + eprintln!(\"<|user_cursor|>\"); + } + "}, + }, + Case { + name: "multiple_set_commands", + original: indoc! {" + aaa + bbb + ccc + ddd + "}, + model_output: indoc! {" + <|set|>0:23 + AAA + <|set|>2:29 + CCC + "}, + expected: indoc! {" + AAA + bbb + CCC + ddd + "}, + }, + Case { + name: "set_range_multiline_replacement", + original: indoc! {" + fn handle_submit() { + } + + fn handle_keystroke() { + "}, + model_output: indoc! {" + <|set|>0:3f-1:7d + fn handle_submit(modal_state: &mut ModalState) { + <|user_cursor|> + } + "}, + expected: indoc! {" + fn handle_submit(modal_state: &mut ModalState) { + <|user_cursor|> + } + + fn handle_keystroke() { + "}, + }, + Case { + name: "no_edit_commands_returns_original", + original: indoc! {" + hello + world + "}, + model_output: "some random text with no commands", + expected: indoc! {" + hello + world + "}, + }, + Case { + name: "wrong_hash_set_ignored", + original: indoc! {" + aaa + bbb + "}, + model_output: indoc! {" + <|set|>0:ff + ZZZ + "}, + expected: indoc! {" + aaa + bbb + "}, + }, + Case { + name: "insert_and_set_combined", + original: indoc! {" + alpha + beta + gamma + "}, + model_output: indoc! {" + <|set|>0:06 + ALPHA + <|insert|>1:9c + beta_extra + "}, + expected: indoc! {" + ALPHA + beta + beta_extra + gamma + "}, + }, + Case { + name: "no_trailing_newline_preserved", + original: "hello\nworld", + model_output: indoc! {" + <|set|>0:14 + HELLO + "}, + expected: "HELLO\nworld", + }, + Case { + name: "set_range_hash_mismatch_in_end_bound", + original: indoc! {" + one + two + three + "}, + model_output: indoc! {" + <|set|>0:42-2:ff + ONE_TWO_THREE + "}, + expected: indoc! {" + one + two + three + "}, + }, + Case { + name: "set_range_start_greater_than_end_ignored", + original: indoc! {" + a + b + c + "}, + model_output: indoc! {" + <|set|>2:63-1:62 + X + "}, + expected: indoc! {" + a + b + c + "}, + }, + Case { + name: "insert_out_of_bounds_ignored", + original: indoc! {" + x + y + "}, + model_output: indoc! {" + <|insert|>99:aa + z + "}, + expected: indoc! {" + x + y + "}, + }, + Case { + name: "set_out_of_bounds_ignored", + original: indoc! {" + x + y + "}, + model_output: indoc! {" + <|set|>99:aa + z + "}, + expected: indoc! {" + x + y + "}, + }, + Case { + name: "malformed_set_command_ignored", + original: indoc! {" + alpha + beta + "}, + model_output: indoc! {" + <|set|>not-a-line-ref + UPDATED + "}, + expected: indoc! {" + alpha + beta + "}, + }, + Case { + name: "malformed_insert_hash_treated_as_before_first", + original: indoc! {" + alpha + beta + "}, + model_output: indoc! {" + <|insert|>1:nothex + preamble + "}, + expected: indoc! {" + preamble + alpha + beta + "}, + }, + Case { + name: "set_then_insert_same_target_orders_insert_after_replacement", + original: indoc! {" + cat + dog + "}, + model_output: indoc! {" + <|set|>0:38 + CAT + <|insert|>0:38 + TAIL + "}, + expected: indoc! {" + CAT + TAIL + dog + "}, + }, + Case { + name: "overlapping_set_ranges_last_wins", + original: indoc! {" + a + b + c + d + "}, + model_output: indoc! {" + <|set|>0:61-2:63 + FIRST + <|set|>1:62-3:64 + SECOND + "}, + expected: indoc! {" + FIRST + d + "}, + }, + Case { + name: "insert_before_first_and_after_line", + original: indoc! {" + a + b + "}, + model_output: indoc! {" + <|insert|> + HEAD + <|insert|>0:61 + MID + "}, + expected: indoc! {" + HEAD + a + MID + b + "}, + }, + ]; + + for case in &cases { + let result = hashline::apply_edit_commands(case.original, &case.model_output); + assert_eq!(result, case.expected, "failed case: {}", case.name); + } + } + + #[test] + fn test_output_has_edit_commands() { + assert!(hashline::output_has_edit_commands(&format!( + "{}0:ab\nnew", + SET_COMMAND_MARKER + ))); + assert!(hashline::output_has_edit_commands(&format!( + "{}0:ab\nnew", + INSERT_COMMAND_MARKER + ))); + assert!(hashline::output_has_edit_commands(&format!( + "some text\n{}1:cd\nstuff", + SET_COMMAND_MARKER + ))); + assert!(!hashline::output_has_edit_commands("just plain text")); + assert!(!hashline::output_has_edit_commands("NO_EDITS")); + } + + // ---- hashline::patch_to_edit_commands round-trip tests ---- + + #[test] + fn test_patch_to_edit_commands() { + struct Case { + name: &'static str, + old: &'static str, + patch: &'static str, + expected_new: &'static str, + } + + let cases = [ + Case { + name: "single_line_replacement", + old: indoc! {" + let mut total = 0; + for product in products { + total += ; + } + total + "}, + patch: indoc! {" + @@ -1,5 +1,5 @@ + let mut total = 0; + for product in products { + - total += ; + + total += product.price; + } + total + "}, + expected_new: indoc! {" + let mut total = 0; + for product in products { + total += product.price; + } + total + "}, + }, + Case { + name: "multiline_replacement", + old: indoc! {" + fn foo() { + let x = 1; + let y = 2; + let z = 3; + } + "}, + patch: indoc! {" + @@ -1,5 +1,3 @@ + fn foo() { + - let x = 1; + - let y = 2; + - let z = 3; + + let sum = 1 + 2 + 3; + } + "}, + expected_new: indoc! {" + fn foo() { + let sum = 1 + 2 + 3; + } + "}, + }, + Case { + name: "insertion", + old: indoc! {" + fn main() { + let x = 1; + } + "}, + patch: indoc! {" + @@ -1,3 +1,4 @@ + fn main() { + let x = 1; + + let y = 2; + } + "}, + expected_new: indoc! {" + fn main() { + let x = 1; + let y = 2; + } + "}, + }, + Case { + name: "insertion_before_first", + old: indoc! {" + let x = 1; + let y = 2; + "}, + patch: indoc! {" + @@ -1,2 +1,3 @@ + +use std::io; + let x = 1; + let y = 2; + "}, + expected_new: indoc! {" + use std::io; + let x = 1; + let y = 2; + "}, + }, + Case { + name: "deletion", + old: indoc! {" + aaa + bbb + ccc + ddd + "}, + patch: indoc! {" + @@ -1,4 +1,2 @@ + aaa + -bbb + -ccc + ddd + "}, + expected_new: indoc! {" + aaa + ddd + "}, + }, + Case { + name: "multiple_changes", + old: indoc! {" + alpha + beta + gamma + delta + epsilon + "}, + patch: indoc! {" + @@ -1,5 +1,5 @@ + -alpha + +ALPHA + beta + gamma + -delta + +DELTA + epsilon + "}, + expected_new: indoc! {" + ALPHA + beta + gamma + DELTA + epsilon + "}, + }, + Case { + name: "replace_with_insertion", + old: indoc! {r#" + fn handle() { + modal_state.close(); + modal_state.dismiss(); + "#}, + patch: indoc! {r#" + @@ -1,3 +1,4 @@ + fn handle() { + modal_state.close(); + + eprintln!(""); + modal_state.dismiss(); + "#}, + expected_new: indoc! {r#" + fn handle() { + modal_state.close(); + eprintln!(""); + modal_state.dismiss(); + "#}, + }, + Case { + name: "complete_replacement", + old: indoc! {" + aaa + bbb + ccc + "}, + patch: indoc! {" + @@ -1,3 +1,3 @@ + -aaa + -bbb + -ccc + +xxx + +yyy + +zzz + "}, + expected_new: indoc! {" + xxx + yyy + zzz + "}, + }, + Case { + name: "add_function_body", + old: indoc! {" + fn foo() { + modal_state.dismiss(); + } + + fn + + fn handle_keystroke() { + "}, + patch: indoc! {" + @@ -1,6 +1,8 @@ + fn foo() { + modal_state.dismiss(); + } + + -fn + +fn handle_submit() { + + todo() + +} + + fn handle_keystroke() { + "}, + expected_new: indoc! {" + fn foo() { + modal_state.dismiss(); + } + + fn handle_submit() { + todo() + } + + fn handle_keystroke() { + "}, + }, + Case { + name: "with_cursor_offset", + old: indoc! {r#" + fn main() { + println!(); + } + "#}, + patch: indoc! {r#" + @@ -1,3 +1,3 @@ + fn main() { + - println!(); + + eprintln!(""); + } + "#}, + expected_new: indoc! {r#" + fn main() { + eprintln!("<|user_cursor|>"); + } + "#}, + }, + Case { + name: "non_local_hunk_header_pure_insertion_repro", + old: indoc! {" + aaa + bbb + "}, + patch: indoc! {" + @@ -20,2 +20,3 @@ + aaa + +xxx + bbb + "}, + expected_new: indoc! {" + aaa + xxx + bbb + "}, + }, + ]; + + for case in &cases { + // The cursor_offset for patch_to_edit_commands is relative to + // the first hunk's new text (context + additions). We compute + // it by finding where the marker sits in the expected output + // (which mirrors the new text of the hunk). + let cursor_offset = case.expected_new.find(CURSOR_MARKER); + + let commands = + hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset) + .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name)); + + assert!( + hashline::output_has_edit_commands(&commands), + "case {}: expected edit commands, got: {commands:?}", + case.name, + ); + + let applied = hashline::apply_edit_commands(case.old, &commands); + assert_eq!(applied, case.expected_new, "case {}", case.name); + } + } + } +} + pub mod seed_coder { //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode. //! @@ -847,6 +2392,17 @@ pub mod seed_coder { ] } + pub fn write_cursor_excerpt_section( + prompt: &mut String, + path: &Path, + context: &str, + editable_range: &Range, + cursor_offset: usize, + ) { + let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset); + prompt.push_str(§ion); + } + pub fn format_prompt_with_budget( path: &Path, context: &str, @@ -1186,7 +2742,7 @@ mod tests { } fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String { - format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens) + format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens) } #[test] @@ -1551,11 +3107,11 @@ mod tests { } fn format_seed_coder(input: &ZetaPromptInput) -> String { - format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000) + format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000) } fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String { - format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens) + format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens) } #[test]