diff --git a/crates/edit_prediction_cli/src/format_prompt.rs b/crates/edit_prediction_cli/src/format_prompt.rs index ae0d60ecb508f3a7ab46daf0b35a8d741e39d5dc..15932706684ec47bac00048407426a617c21a23b 100644 --- a/crates/edit_prediction_cli/src/format_prompt.rs +++ b/crates/edit_prediction_cli/src/format_prompt.rs @@ -33,10 +33,10 @@ pub async fn run_format_prompt( .context("prompt_inputs must be set after context retrieval")?; match args.provider { - PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => { + PredictionProvider::Teacher(_, zeta_format) + | PredictionProvider::TeacherNonBatching(_, zeta_format) => { step_progress.set_substatus("formatting teacher prompt"); - let zeta_format = ZetaFormat::default(); let (editable_range, context_range) = excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges); @@ -163,6 +163,20 @@ pub fn zeta2_output_for_patch( ); } + if version == ZetaFormat::V0327SingleFile { + let cursor_in_new = cursor_offset.map(|cursor_offset| { + let hunk_start = first_hunk_offset.unwrap_or(0); + result.floor_char_boundary((hunk_start + cursor_offset).min(result.len())) + }); + return multi_region::encode_from_old_and_new_v0318( + &old_editable_region, + &result, + cursor_in_new, + zeta_prompt::CURSOR_MARKER, + multi_region::V0327_END_MARKER, + ); + } + if version == ZetaFormat::V0316SeedMultiRegions { let cursor_in_new = cursor_offset.map(|cursor_offset| { let hunk_start = first_hunk_offset.unwrap_or(0); diff --git a/crates/edit_prediction_cli/src/main.rs b/crates/edit_prediction_cli/src/main.rs index d144f998ff27b90e3009f82c367bf4699db4341e..2df477e2693c074e12668173db6a38627ca57213 100644 --- a/crates/edit_prediction_cli/src/main.rs +++ b/crates/edit_prediction_cli/src/main.rs @@ -46,6 +46,7 @@ use std::fmt::Display; use std::fs::{File, OpenOptions}; use std::hash::{Hash, Hasher}; use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::str::FromStr; use std::sync::Mutex; use std::{path::PathBuf, sync::Arc}; @@ -363,9 +364,9 @@ enum PredictionProvider { Zeta1, Zeta2(ZetaFormat), Baseten(ZetaFormat), - Teacher(TeacherBackend), + Teacher(TeacherBackend, ZetaFormat), TeacherMultiRegion(TeacherBackend), - TeacherNonBatching(TeacherBackend), + TeacherNonBatching(TeacherBackend, ZetaFormat), TeacherMultiRegionNonBatching(TeacherBackend), Repair, } @@ -383,12 +384,14 @@ impl std::fmt::Display for PredictionProvider { PredictionProvider::Zeta1 => write!(f, "zeta1"), PredictionProvider::Zeta2(format) => write!(f, "zeta2:{format}"), PredictionProvider::Baseten(format) => write!(f, "baseten:{format}"), - PredictionProvider::Teacher(backend) => write!(f, "teacher:{backend}"), + PredictionProvider::Teacher(backend, format) => { + write!(f, "teacher:{backend}:{format:?}") + } PredictionProvider::TeacherMultiRegion(backend) => { write!(f, "teacher-multi-region:{backend}") } - PredictionProvider::TeacherNonBatching(backend) => { - write!(f, "teacher-non-batching:{backend}") + PredictionProvider::TeacherNonBatching(backend, format) => { + write!(f, "teacher-non-batching:{backend}:{format:?}") } PredictionProvider::TeacherMultiRegionNonBatching(backend) => { write!(f, "teacher-multi-region-non-batching:{backend}") @@ -412,12 +415,16 @@ impl std::str::FromStr for PredictionProvider { let format = arg.map(ZetaFormat::parse).transpose()?.unwrap_or_default(); Ok(PredictionProvider::Zeta2(format)) } - "teacher" => { + "teacher" => parse_teacher_args(arg), + "teacher-non-batching" | "teacher_non_batching" => { let backend = arg .map(|a| a.parse()) .transpose()? .unwrap_or(TeacherBackend::default()); - Ok(PredictionProvider::Teacher(backend)) + Ok(PredictionProvider::TeacherNonBatching( + backend, + ZetaFormat::default(), + )) } "teacher-multi-region" | "teacher_multi_region" => { let backend = arg @@ -426,13 +433,6 @@ impl std::str::FromStr for PredictionProvider { .unwrap_or(TeacherBackend::default()); Ok(PredictionProvider::TeacherMultiRegion(backend)) } - "teacher-non-batching" | "teacher_non_batching" => { - let backend = arg - .map(|a| a.parse()) - .transpose()? - .unwrap_or(TeacherBackend::default()); - Ok(PredictionProvider::TeacherNonBatching(backend)) - } "teacher-multi-region-non-batching" | "teacher_multi_region_non_batching" => { let backend = arg .map(|a| a.parse()) @@ -461,6 +461,27 @@ impl std::str::FromStr for PredictionProvider { } } +fn parse_teacher_args(arg: Option<&str>) -> Result { + let mut backend = TeacherBackend::default(); + let mut format = ZetaFormat::default(); + + for arg in arg.unwrap_or_default().split(':') { + if arg.is_empty() { + continue; + } + + if let Ok(parsed_backend) = TeacherBackend::from_str(arg) { + backend = parsed_backend; + } else if let Ok(parsed_format) = ZetaFormat::parse(arg) { + format = parsed_format; + } else { + anyhow::bail!("unknown teacher backend or zeta format `{arg}`"); + } + } + + Ok(PredictionProvider::Teacher(backend, format)) +} + impl Serialize for PredictionProvider { fn serialize(&self, serializer: S) -> Result where diff --git a/crates/edit_prediction_cli/src/parse_output.rs b/crates/edit_prediction_cli/src/parse_output.rs index fc85afa371a4edfe8080d602000c38ecedb98c86..c8e0fa7568cb2b8e0b2e63fdb9219d19eda9baea 100644 --- a/crates/edit_prediction_cli/src/parse_output.rs +++ b/crates/edit_prediction_cli/src/parse_output.rs @@ -37,7 +37,7 @@ pub fn parse_prediction_output( provider: PredictionProvider, ) -> Result<(String, Option)> { match provider { - PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => { + PredictionProvider::Teacher(_, _) | PredictionProvider::TeacherNonBatching(_, _) => { TeacherPrompt::parse(example, actual_output) } PredictionProvider::TeacherMultiRegion(_) diff --git a/crates/edit_prediction_cli/src/predict.rs b/crates/edit_prediction_cli/src/predict.rs index 99d90f0f4e524256ee3e7ec8f1bfdd6af34c566b..c925527feb65fdad67ff3de1cc3ea79b6b236b6a 100644 --- a/crates/edit_prediction_cli/src/predict.rs +++ b/crates/edit_prediction_cli/src/predict.rs @@ -57,10 +57,16 @@ pub async fn run_prediction( ); }; - if let PredictionProvider::Teacher(backend) - | PredictionProvider::TeacherMultiRegion(backend) - | PredictionProvider::TeacherNonBatching(backend) - | PredictionProvider::TeacherMultiRegionNonBatching(backend) = provider + if matches!( + provider, + PredictionProvider::TeacherMultiRegion(..) + | PredictionProvider::TeacherMultiRegionNonBatching(..) + ) { + anyhow::bail!("Teacher multi-region providers are not supported for prediction."); + } + + if let PredictionProvider::Teacher(backend, _) + | PredictionProvider::TeacherNonBatching(backend, _) = provider { run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?; run_format_prompt( @@ -416,14 +422,14 @@ async fn predict_anthropic( .prompt .as_ref() .map(|prompt| prompt.provider) - .unwrap_or(PredictionProvider::Teacher(backend)) + .unwrap_or(PredictionProvider::Teacher(backend, ZetaFormat::default())) } else { match example.prompt.as_ref().map(|prompt| prompt.provider) { Some(PredictionProvider::TeacherMultiRegion(_)) | Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => { PredictionProvider::TeacherMultiRegionNonBatching(backend) } - _ => PredictionProvider::TeacherNonBatching(backend), + _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()), } }; @@ -445,7 +451,7 @@ async fn predict_anthropic( Some(PredictionProvider::TeacherMultiRegion(_)) => { PredictionProvider::TeacherMultiRegion(backend) } - _ => PredictionProvider::Teacher(backend), + _ => PredictionProvider::Teacher(backend, ZetaFormat::default()), } } else { match example.prompt.as_ref().map(|prompt| prompt.provider) { @@ -453,7 +459,7 @@ async fn predict_anthropic( | Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => { PredictionProvider::TeacherMultiRegionNonBatching(backend) } - _ => PredictionProvider::TeacherNonBatching(backend), + _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()), } }, cumulative_logprob: None, @@ -535,14 +541,14 @@ async fn predict_openai( .prompt .as_ref() .map(|prompt| prompt.provider) - .unwrap_or(PredictionProvider::Teacher(backend)) + .unwrap_or(PredictionProvider::Teacher(backend, ZetaFormat::default())) } else { match example.prompt.as_ref().map(|prompt| prompt.provider) { Some(PredictionProvider::TeacherMultiRegion(_)) | Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => { PredictionProvider::TeacherMultiRegionNonBatching(backend) } - _ => PredictionProvider::TeacherNonBatching(backend), + _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()), } }; @@ -564,7 +570,7 @@ async fn predict_openai( Some(PredictionProvider::TeacherMultiRegion(_)) => { PredictionProvider::TeacherMultiRegion(backend) } - _ => PredictionProvider::Teacher(backend), + _ => PredictionProvider::Teacher(backend, ZetaFormat::default()), } } else { match example.prompt.as_ref().map(|prompt| prompt.provider) { @@ -572,7 +578,7 @@ async fn predict_openai( | Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => { PredictionProvider::TeacherMultiRegionNonBatching(backend) } - _ => PredictionProvider::TeacherNonBatching(backend), + _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()), } }, cumulative_logprob: None, @@ -671,7 +677,7 @@ pub async fn predict_baseten( pub async fn sync_batches(provider: Option<&PredictionProvider>) -> anyhow::Result<()> { match provider { - Some(PredictionProvider::Teacher(backend)) + Some(PredictionProvider::Teacher(backend, _)) | Some(PredictionProvider::TeacherMultiRegion(backend)) => match backend { TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => { let llm_client = ANTHROPIC_CLIENT.get_or_init(|| { @@ -703,7 +709,7 @@ pub async fn reprocess_after_batch_wait( examples: &mut [Example], args: &PredictArgs, ) -> anyhow::Result<()> { - let Some(PredictionProvider::Teacher(backend)) = args.provider else { + let Some(PredictionProvider::Teacher(backend, _)) = args.provider else { return Ok(()); }; @@ -762,7 +768,7 @@ pub async fn wait_for_batches(provider: Option<&PredictionProvider>) -> anyhow:: fn pending_batch_count(provider: Option<&PredictionProvider>) -> anyhow::Result { match provider { - Some(PredictionProvider::Teacher(backend)) => match backend { + Some(PredictionProvider::Teacher(backend, _)) => match backend { TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => { let llm_client = ANTHROPIC_CLIENT.get_or_init(|| { AnthropicClient::batch(&crate::paths::LLM_CACHE_DB) diff --git a/crates/edit_prediction_cli/src/repair.rs b/crates/edit_prediction_cli/src/repair.rs index e8fb36eae28bc65a3f2c865bb95a22175b1d7ad0..4be087ae00ca5fc804140b385f4b01ff6f7d9663 100644 --- a/crates/edit_prediction_cli/src/repair.rs +++ b/crates/edit_prediction_cli/src/repair.rs @@ -525,6 +525,7 @@ mod tests { use crate::{PredictionProvider, TeacherBackend}; use edit_prediction::example_spec::ExampleSpec; use std::{path::Path, sync::Arc}; + use zeta_prompt::ZetaFormat; fn example_with_previous_prediction() -> Example { Example { @@ -557,7 +558,10 @@ mod tests { editable_region_offset: Some(4), }), error: None, - provider: PredictionProvider::Teacher(TeacherBackend::Sonnet45), + provider: PredictionProvider::Teacher( + TeacherBackend::Sonnet45, + ZetaFormat::default(), + ), cumulative_logprob: None, avg_logprob: None, }], diff --git a/crates/edit_prediction_cli/src/score.rs b/crates/edit_prediction_cli/src/score.rs index 38329c8c3329fa3f26f5795b6a9bdcd02997b59f..5e7721e84f7892006145b953107c97d447757089 100644 --- a/crates/edit_prediction_cli/src/score.rs +++ b/crates/edit_prediction_cli/src/score.rs @@ -52,7 +52,7 @@ pub async fn run_scoring( let old_editable_region = if let Some(p) = example.prompt.as_ref() { if matches!( p.provider, - PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) + PredictionProvider::Teacher(_, _) | PredictionProvider::TeacherNonBatching(_, _) ) { Some( TeacherPrompt::extract_editable_region(&p.input)? diff --git a/crates/zeta_prompt/src/multi_region.rs b/crates/zeta_prompt/src/multi_region.rs index a2e50ca445998672a169f4220d13eb4c13a22e8b..5bd486df767aacef92484ae9d685994245755449 100644 --- a/crates/zeta_prompt/src/multi_region.rs +++ b/crates/zeta_prompt/src/multi_region.rs @@ -11,6 +11,7 @@ const MAX_NUDGE_LINES: usize = 5; pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>"; pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>"; pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>"; +pub const V0327_END_MARKER: &str = "<[end▁of▁sentence]>"; pub fn marker_tag(number: usize) -> String { format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}") @@ -143,6 +144,112 @@ pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec { compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES) } +fn line_start_at_or_before(text: &str, offset: usize) -> usize { + let bounded_offset = text.floor_char_boundary(offset.min(text.len())); + text[..bounded_offset] + .rfind('\n') + .map(|index| index + 1) + .unwrap_or(0) +} + +fn line_end_at_or_after(text: &str, offset: usize) -> usize { + let bounded_offset = text.floor_char_boundary(offset.min(text.len())); + if bounded_offset >= text.len() { + return text.len(); + } + + text[bounded_offset..] + .find('\n') + .map(|index| bounded_offset + index + 1) + .unwrap_or(text.len()) +} + +fn grow_v0327_candidate_range( + text: &str, + cursor_offset: usize, + editable_token_limit: usize, +) -> std::ops::Range { + if text.is_empty() { + return 0..0; + } + + let byte_budget = editable_token_limit.saturating_mul(3).max(1); + let half_budget = byte_budget / 2; + + let mut start = cursor_offset.saturating_sub(half_budget); + let mut end = start.saturating_add(byte_budget).min(text.len()); + + if end.saturating_sub(start) < byte_budget { + start = end.saturating_sub(byte_budget); + } + + start = line_start_at_or_before(text, start); + end = line_end_at_or_after(text, end); + + if start < end { + start..end + } else { + let line_start = line_start_at_or_before(text, cursor_offset); + let line_end = line_end_at_or_after(text, cursor_offset); + line_start..line_end.max(line_start) + } +} + +fn trim_v0327_candidate_range_to_markers( + text: &str, + candidate_range: std::ops::Range, + cursor_offset: usize, +) -> std::ops::Range { + let candidate_text = &text[candidate_range.clone()]; + let marker_offsets = compute_marker_offsets_v0318(candidate_text); + + if marker_offsets.len() <= 2 { + return candidate_range; + } + + let candidate_cursor_offset = cursor_offset + .saturating_sub(candidate_range.start) + .min(candidate_text.len()); + let first_internal_marker_index = if candidate_cursor_offset >= marker_offsets[1] { + 1 + } else { + 0 + }; + let last_internal_marker_index = marker_offsets.len() - 2; + let last_marker_index = marker_offsets.len() - 1; + let end_marker_index = if candidate_cursor_offset <= marker_offsets[last_internal_marker_index] + { + last_internal_marker_index + } else { + last_marker_index + }; + + let trimmed_start = candidate_range.start + marker_offsets[first_internal_marker_index]; + let trimmed_end = candidate_range.start + marker_offsets[end_marker_index]; + + if trimmed_start < trimmed_end { + trimmed_start..trimmed_end + } else { + let block_index = cursor_block_index(Some(candidate_cursor_offset), &marker_offsets); + let start = candidate_range.start + marker_offsets[block_index]; + let end = candidate_range.start + marker_offsets[block_index + 1]; + if start < end { + start..end + } else { + candidate_range + } + } +} + +pub fn compute_v0327_editable_range( + text: &str, + cursor_offset: usize, + editable_token_limit: usize, +) -> std::ops::Range { + let candidate_range = grow_v0327_candidate_range(text, cursor_offset, editable_token_limit); + trim_v0327_candidate_range_to_markers(text, candidate_range, cursor_offset) +} + /// Write the editable region content with marker tags, inserting the cursor /// marker at the given offset within the editable text. pub fn write_editable_with_markers( @@ -1113,6 +1220,32 @@ hhhhhhhhhh = 8; assert_eq!(offsets, vec![0, 0]); } + #[test] + fn test_compute_v0327_editable_range_trims_to_marker_boundaries() { + let text = (0..80).map(|_| "x\n").collect::(); + let cursor_offset = text.find("x\nx\nx\nx\nx\n").expect("cursor anchor exists") + 40; + + let candidate_range = grow_v0327_candidate_range(&text, cursor_offset, 20); + let editable_range = compute_v0327_editable_range(&text, cursor_offset, 20); + let marker_offsets = compute_marker_offsets_v0318(&text[candidate_range.clone()]); + let relative_start = editable_range.start - candidate_range.start; + let relative_end = editable_range.end - candidate_range.start; + + assert!( + marker_offsets.len() > 2, + "expected interior markers: {marker_offsets:?}" + ); + assert!(marker_offsets.contains(&relative_start)); + assert!(marker_offsets.contains(&relative_end)); + assert!(editable_range.start <= cursor_offset); + assert!(editable_range.end >= cursor_offset); + assert!( + editable_range.start > candidate_range.start + || editable_range.end < candidate_range.end, + "expected at least one side to trim from {candidate_range:?} down to {editable_range:?}" + ); + } + #[test] fn test_compute_marker_offsets_avoid_short_markdown_blocks() { let text = "\ diff --git a/crates/zeta_prompt/src/zeta_prompt.rs b/crates/zeta_prompt/src/zeta_prompt.rs index 3fa12a7a789b196b0219fadaec24f38b42a5b259..bf5695dcad443ee827aeae7ab2bd3939600cfb61 100644 --- a/crates/zeta_prompt/src/zeta_prompt.rs +++ b/crates/zeta_prompt/src/zeta_prompt.rs @@ -89,10 +89,12 @@ pub enum ZetaFormat { V0306SeedMultiRegions, /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit. V0316SeedMultiRegions, - /// V0316 with larger block sizes. - V0318SeedMultiRegions, /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1). V0317SeedMultiRegions, + /// V0316 with larger block sizes. + V0318SeedMultiRegions, + /// V0318-style markers over the full available current file excerpt with no related files. + V0327SingleFile, } impl std::fmt::Display for ZetaFormat { @@ -279,6 +281,18 @@ pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] ]; TOKENS } + ZetaFormat::V0327SingleFile => { + static TOKENS: &[&str] = &[ + seed_coder::FIM_SUFFIX, + seed_coder::FIM_PREFIX, + seed_coder::FIM_MIDDLE, + seed_coder::FILE_MARKER, + multi_region::V0327_END_MARKER, + CURSOR_MARKER, + multi_region::MARKER_TAG_PREFIX, + ]; + TOKENS + } ZetaFormat::V0306SeedMultiRegions => { static TOKENS: &[&str] = &[ seed_coder::FIM_SUFFIX, @@ -310,7 +324,9 @@ pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) { | ZetaFormat::V0316SeedMultiRegions | ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0317SeedMultiRegions + | ZetaFormat::V0327SingleFile | ZetaFormat::V0304SeedNoEdits => (350, 150), + ZetaFormat::V0304VariableEdit => (1024, 0), } } @@ -331,9 +347,11 @@ pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] { ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER], ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER], ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER], + ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER], } } +/// Return (editable_range, context_range) for the prompt format pub fn excerpt_ranges_for_format( format: ZetaFormat, ranges: &ExcerptRanges, @@ -360,6 +378,14 @@ pub fn excerpt_ranges_for_format( ranges.editable_350.clone(), ranges.editable_350_context_150.clone(), ), + ZetaFormat::V0327SingleFile => ( + ranges.editable_350_context_150.clone(), + ranges.context_8192.clone().unwrap_or( + // shouldn't be used, only for compat with old data/clients + ranges.editable_350_context_150.clone(), + ), + ), + ZetaFormat::V0304VariableEdit => { let context = ranges .editable_350_context_1024 @@ -463,6 +489,14 @@ pub fn write_cursor_excerpt_section_for_format( cursor_offset, )); } + ZetaFormat::V0327SingleFile => { + prompt.push_str(&build_v0318_cursor_prefix( + path, + context, + editable_range, + cursor_offset, + )); + } } } @@ -585,6 +619,40 @@ fn offset_range_to_row_range(text: &str, range: Range) -> Range { return start_row..end_row; } +fn assemble_single_file_fim_prompt( + context: &str, + editable_range: &Range, + cursor_prefix_section: &str, + events: &[Arc], + max_tokens: usize, +) -> String { + let suffix_section = seed_coder::build_suffix_section(context, editable_range); + + let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len()); + let cursor_prefix_tokens = + estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len()); + let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens); + + let edit_history_section = format_edit_history_within_budget( + events, + seed_coder::FILE_MARKER, + "edit_history", + budget_after_cursor, + max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile), + ); + + let mut prompt = String::new(); + prompt.push_str(&suffix_section); + prompt.push_str(seed_coder::FIM_PREFIX); + prompt.push_str(&edit_history_section); + if !edit_history_section.is_empty() { + prompt.push('\n'); + } + prompt.push_str(cursor_prefix_section); + prompt.push_str(seed_coder::FIM_MIDDLE); + prompt +} + pub fn format_prompt_with_budget_for_format( input: &ZetaPromptInput, format: ZetaFormat, @@ -596,18 +664,19 @@ pub fn format_prompt_with_budget_for_format( let empty_files = Vec::new(); let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files); - let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row { + let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row { let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range); let row_range = relative_row_range.start + cursor_excerpt_start_row ..relative_row_range.end + cursor_excerpt_start_row; - &filter_redundant_excerpts( + filter_redundant_excerpts( input_related_files.to_vec(), input.cursor_path.as_ref(), row_range, ) } else { - input_related_files + input_related_files.to_vec() }; + let related_files = filtered_related_files.as_slice(); let prompt = match format { ZetaFormat::V0211SeedCoder @@ -636,6 +705,25 @@ pub fn format_prompt_with_budget_for_format( budget_with_margin, ) } + ZetaFormat::V0327SingleFile => { + let mut cursor_section = String::new(); + write_cursor_excerpt_section_for_format( + format, + &mut cursor_section, + path, + context, + &editable_range, + cursor_offset, + ); + + assemble_single_file_fim_prompt( + context, + &editable_range, + &cursor_section, + &input.events, + apply_prompt_budget_margin(max_tokens), + ) + } _ => { let mut cursor_section = String::new(); write_cursor_excerpt_section_for_format( @@ -714,7 +802,8 @@ pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize { | ZetaFormat::V0306SeedMultiRegions | ZetaFormat::V0316SeedMultiRegions | ZetaFormat::V0318SeedMultiRegions - | ZetaFormat::V0317SeedMultiRegions => 6, + | ZetaFormat::V0317SeedMultiRegions + | ZetaFormat::V0327SingleFile => 6, } } @@ -737,7 +826,8 @@ pub fn get_prefill_for_format( | ZetaFormat::V0306SeedMultiRegions | ZetaFormat::V0316SeedMultiRegions | ZetaFormat::V0318SeedMultiRegions - | ZetaFormat::V0317SeedMultiRegions => String::new(), + | ZetaFormat::V0317SeedMultiRegions + | ZetaFormat::V0327SingleFile => String::new(), } } @@ -752,6 +842,8 @@ pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER), ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER), ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER), + ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER), + ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion @@ -822,6 +914,22 @@ pub fn encode_patch_as_output_for_format( Ok(None) } } + ZetaFormat::V0327SingleFile => { + let empty_patch = patch.lines().count() <= 3; + if empty_patch { + let marker_offsets = + multi_region::compute_marker_offsets_v0318(old_editable_region); + let marker_num = + multi_region::nearest_marker_number(cursor_offset, &marker_offsets); + let tag = multi_region::marker_tag(marker_num); + Ok(Some(format!( + "{tag}{tag}{}", + multi_region::V0327_END_MARKER + ))) + } else { + Ok(None) + } + } _ => Ok(None), } } @@ -1027,6 +1135,10 @@ pub fn parse_zeta2_model_output( Some(cursor_offset_in_editable), )?, ), + ZetaFormat::V0327SingleFile => ( + editable_range_in_context, + multi_region::apply_marker_span_v0318(old_editable_region, output)?, + ), _ => (editable_range_in_context, output.to_string()), }; @@ -1135,7 +1247,16 @@ pub fn resolve_cursor_region( input: &ZetaPromptInput, format: ZetaFormat, ) -> (&str, Range, Range, usize) { - let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges { + let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile { + let (editable_tokens, _) = token_limits_for_format(format); + let context_range = 0..input.cursor_excerpt.len(); + let editable_range = multi_region::compute_v0327_editable_range( + &input.cursor_excerpt, + input.cursor_offset_in_excerpt, + editable_tokens, + ); + (editable_range, context_range) + } else if let Some(syntax_ranges) = &input.syntax_ranges { let (editable_tokens, context_tokens) = token_limits_for_format(format); compute_editable_and_context_ranges( &input.cursor_excerpt, @@ -1147,6 +1268,7 @@ pub fn resolve_cursor_region( } else { excerpt_range_for_format(format, &input.excerpt_ranges) }; + let context_start = context_range.start; let context_text = &input.cursor_excerpt[context_range.clone()]; let adjusted_editable = @@ -3218,7 +3340,7 @@ pub mod seed_coder { prompt } - fn build_suffix_section(context: &str, editable_range: &Range) -> String { + pub(crate) fn build_suffix_section(context: &str, editable_range: &Range) -> String { let mut section = String::new(); section.push_str(FIM_SUFFIX); section.push_str(&context[editable_range.end..]); @@ -5005,6 +5127,71 @@ mod tests { assert!(prompt.contains(CURSOR_MARKER)); } + #[test] + fn test_v0327_formats_single_file_prompt_without_related_files() { + let excerpt = indoc! {" + line01 + line02 + line03 + line04 + line05 + line06 + line07 + line08 + line09 + line10 + line11 + line12 + line13 + line14 + line15 + line16 + line17 + line18 + line19 + line20 + "}; + let cursor_offset = excerpt.find("line10").expect("cursor line exists"); + let input = make_input( + excerpt, + 0..excerpt.len(), + cursor_offset, + vec![make_event("a.rs", "-x\n+y\n")], + vec![make_related_file("related.rs", "fn helper() {}\n")], + ); + + let prompt = + format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096) + .expect("v0327 prompt should fit"); + + assert!(prompt.contains("line01")); + assert!(prompt.contains("line20")); + assert!(prompt.contains("edit_history")); + assert!(prompt.contains("test.rs")); + assert!(prompt.contains(CURSOR_MARKER)); + assert!(!prompt.contains("related.rs")); + assert!(!prompt.contains("fn helper() {}")); + } + + #[test] + fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() { + let excerpt = (0..80) + .map(|index| format!("l{index:02}\n")) + .collect::(); + let cursor_offset = excerpt.find("l40").expect("cursor line exists"); + let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]); + + let (context, editable_range, context_range, adjusted_cursor) = + resolve_cursor_region(&input, ZetaFormat::V0327SingleFile); + + assert_eq!(context, excerpt); + assert_eq!(context_range, 0..excerpt.len()); + assert_eq!(adjusted_cursor, cursor_offset); + assert!(editable_range.start < adjusted_cursor); + assert!(editable_range.end > adjusted_cursor); + assert!(editable_range.end < excerpt.len()); + } + #[test] fn test_seed_coder_no_context() { let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);