Detailed changes
@@ -33,10 +33,10 @@ pub async fn run_format_prompt(
.context("prompt_inputs must be set after context retrieval")?;
match args.provider {
- PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
+ PredictionProvider::Teacher(_, zeta_format)
+ | PredictionProvider::TeacherNonBatching(_, zeta_format) => {
step_progress.set_substatus("formatting teacher prompt");
- let zeta_format = ZetaFormat::default();
let (editable_range, context_range) =
excerpt_range_for_format(zeta_format, &prompt_inputs.excerpt_ranges);
@@ -163,6 +163,20 @@ pub fn zeta2_output_for_patch(
);
}
+ if version == ZetaFormat::V0327SingleFile {
+ let cursor_in_new = cursor_offset.map(|cursor_offset| {
+ let hunk_start = first_hunk_offset.unwrap_or(0);
+ result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
+ });
+ return multi_region::encode_from_old_and_new_v0318(
+ &old_editable_region,
+ &result,
+ cursor_in_new,
+ zeta_prompt::CURSOR_MARKER,
+ multi_region::V0327_END_MARKER,
+ );
+ }
+
if version == ZetaFormat::V0316SeedMultiRegions {
let cursor_in_new = cursor_offset.map(|cursor_offset| {
let hunk_start = first_hunk_offset.unwrap_or(0);
@@ -46,6 +46,7 @@ use std::fmt::Display;
use std::fs::{File, OpenOptions};
use std::hash::{Hash, Hasher};
use std::io::{BufRead, BufReader, BufWriter, Write};
+use std::str::FromStr;
use std::sync::Mutex;
use std::{path::PathBuf, sync::Arc};
@@ -363,9 +364,9 @@ enum PredictionProvider {
Zeta1,
Zeta2(ZetaFormat),
Baseten(ZetaFormat),
- Teacher(TeacherBackend),
+ Teacher(TeacherBackend, ZetaFormat),
TeacherMultiRegion(TeacherBackend),
- TeacherNonBatching(TeacherBackend),
+ TeacherNonBatching(TeacherBackend, ZetaFormat),
TeacherMultiRegionNonBatching(TeacherBackend),
Repair,
}
@@ -383,12 +384,14 @@ impl std::fmt::Display for PredictionProvider {
PredictionProvider::Zeta1 => write!(f, "zeta1"),
PredictionProvider::Zeta2(format) => write!(f, "zeta2:{format}"),
PredictionProvider::Baseten(format) => write!(f, "baseten:{format}"),
- PredictionProvider::Teacher(backend) => write!(f, "teacher:{backend}"),
+ PredictionProvider::Teacher(backend, format) => {
+ write!(f, "teacher:{backend}:{format:?}")
+ }
PredictionProvider::TeacherMultiRegion(backend) => {
write!(f, "teacher-multi-region:{backend}")
}
- PredictionProvider::TeacherNonBatching(backend) => {
- write!(f, "teacher-non-batching:{backend}")
+ PredictionProvider::TeacherNonBatching(backend, format) => {
+ write!(f, "teacher-non-batching:{backend}:{format:?}")
}
PredictionProvider::TeacherMultiRegionNonBatching(backend) => {
write!(f, "teacher-multi-region-non-batching:{backend}")
@@ -412,12 +415,16 @@ impl std::str::FromStr for PredictionProvider {
let format = arg.map(ZetaFormat::parse).transpose()?.unwrap_or_default();
Ok(PredictionProvider::Zeta2(format))
}
- "teacher" => {
+ "teacher" => parse_teacher_args(arg),
+ "teacher-non-batching" | "teacher_non_batching" => {
let backend = arg
.map(|a| a.parse())
.transpose()?
.unwrap_or(TeacherBackend::default());
- Ok(PredictionProvider::Teacher(backend))
+ Ok(PredictionProvider::TeacherNonBatching(
+ backend,
+ ZetaFormat::default(),
+ ))
}
"teacher-multi-region" | "teacher_multi_region" => {
let backend = arg
@@ -426,13 +433,6 @@ impl std::str::FromStr for PredictionProvider {
.unwrap_or(TeacherBackend::default());
Ok(PredictionProvider::TeacherMultiRegion(backend))
}
- "teacher-non-batching" | "teacher_non_batching" => {
- let backend = arg
- .map(|a| a.parse())
- .transpose()?
- .unwrap_or(TeacherBackend::default());
- Ok(PredictionProvider::TeacherNonBatching(backend))
- }
"teacher-multi-region-non-batching" | "teacher_multi_region_non_batching" => {
let backend = arg
.map(|a| a.parse())
@@ -461,6 +461,27 @@ impl std::str::FromStr for PredictionProvider {
}
}
+fn parse_teacher_args(arg: Option<&str>) -> Result<PredictionProvider, anyhow::Error> {
+ let mut backend = TeacherBackend::default();
+ let mut format = ZetaFormat::default();
+
+ for arg in arg.unwrap_or_default().split(':') {
+ if arg.is_empty() {
+ continue;
+ }
+
+ if let Ok(parsed_backend) = TeacherBackend::from_str(arg) {
+ backend = parsed_backend;
+ } else if let Ok(parsed_format) = ZetaFormat::parse(arg) {
+ format = parsed_format;
+ } else {
+ anyhow::bail!("unknown teacher backend or zeta format `{arg}`");
+ }
+ }
+
+ Ok(PredictionProvider::Teacher(backend, format))
+}
+
impl Serialize for PredictionProvider {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@@ -37,7 +37,7 @@ pub fn parse_prediction_output(
provider: PredictionProvider,
) -> Result<(String, Option<ActualCursor>)> {
match provider {
- PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_) => {
+ PredictionProvider::Teacher(_, _) | PredictionProvider::TeacherNonBatching(_, _) => {
TeacherPrompt::parse(example, actual_output)
}
PredictionProvider::TeacherMultiRegion(_)
@@ -57,10 +57,16 @@ pub async fn run_prediction(
);
};
- if let PredictionProvider::Teacher(backend)
- | PredictionProvider::TeacherMultiRegion(backend)
- | PredictionProvider::TeacherNonBatching(backend)
- | PredictionProvider::TeacherMultiRegionNonBatching(backend) = provider
+ if matches!(
+ provider,
+ PredictionProvider::TeacherMultiRegion(..)
+ | PredictionProvider::TeacherMultiRegionNonBatching(..)
+ ) {
+ anyhow::bail!("Teacher multi-region providers are not supported for prediction.");
+ }
+
+ if let PredictionProvider::Teacher(backend, _)
+ | PredictionProvider::TeacherNonBatching(backend, _) = provider
{
run_context_retrieval(example, app_state.clone(), example_progress, cx.clone()).await?;
run_format_prompt(
@@ -416,14 +422,14 @@ async fn predict_anthropic(
.prompt
.as_ref()
.map(|prompt| prompt.provider)
- .unwrap_or(PredictionProvider::Teacher(backend))
+ .unwrap_or(PredictionProvider::Teacher(backend, ZetaFormat::default()))
} else {
match example.prompt.as_ref().map(|prompt| prompt.provider) {
Some(PredictionProvider::TeacherMultiRegion(_))
| Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => {
PredictionProvider::TeacherMultiRegionNonBatching(backend)
}
- _ => PredictionProvider::TeacherNonBatching(backend),
+ _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()),
}
};
@@ -445,7 +451,7 @@ async fn predict_anthropic(
Some(PredictionProvider::TeacherMultiRegion(_)) => {
PredictionProvider::TeacherMultiRegion(backend)
}
- _ => PredictionProvider::Teacher(backend),
+ _ => PredictionProvider::Teacher(backend, ZetaFormat::default()),
}
} else {
match example.prompt.as_ref().map(|prompt| prompt.provider) {
@@ -453,7 +459,7 @@ async fn predict_anthropic(
| Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => {
PredictionProvider::TeacherMultiRegionNonBatching(backend)
}
- _ => PredictionProvider::TeacherNonBatching(backend),
+ _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()),
}
},
cumulative_logprob: None,
@@ -535,14 +541,14 @@ async fn predict_openai(
.prompt
.as_ref()
.map(|prompt| prompt.provider)
- .unwrap_or(PredictionProvider::Teacher(backend))
+ .unwrap_or(PredictionProvider::Teacher(backend, ZetaFormat::default()))
} else {
match example.prompt.as_ref().map(|prompt| prompt.provider) {
Some(PredictionProvider::TeacherMultiRegion(_))
| Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => {
PredictionProvider::TeacherMultiRegionNonBatching(backend)
}
- _ => PredictionProvider::TeacherNonBatching(backend),
+ _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()),
}
};
@@ -564,7 +570,7 @@ async fn predict_openai(
Some(PredictionProvider::TeacherMultiRegion(_)) => {
PredictionProvider::TeacherMultiRegion(backend)
}
- _ => PredictionProvider::Teacher(backend),
+ _ => PredictionProvider::Teacher(backend, ZetaFormat::default()),
}
} else {
match example.prompt.as_ref().map(|prompt| prompt.provider) {
@@ -572,7 +578,7 @@ async fn predict_openai(
| Some(PredictionProvider::TeacherMultiRegionNonBatching(_)) => {
PredictionProvider::TeacherMultiRegionNonBatching(backend)
}
- _ => PredictionProvider::TeacherNonBatching(backend),
+ _ => PredictionProvider::TeacherNonBatching(backend, ZetaFormat::default()),
}
},
cumulative_logprob: None,
@@ -671,7 +677,7 @@ pub async fn predict_baseten(
pub async fn sync_batches(provider: Option<&PredictionProvider>) -> anyhow::Result<()> {
match provider {
- Some(PredictionProvider::Teacher(backend))
+ Some(PredictionProvider::Teacher(backend, _))
| Some(PredictionProvider::TeacherMultiRegion(backend)) => match backend {
TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => {
let llm_client = ANTHROPIC_CLIENT.get_or_init(|| {
@@ -703,7 +709,7 @@ pub async fn reprocess_after_batch_wait(
examples: &mut [Example],
args: &PredictArgs,
) -> anyhow::Result<()> {
- let Some(PredictionProvider::Teacher(backend)) = args.provider else {
+ let Some(PredictionProvider::Teacher(backend, _)) = args.provider else {
return Ok(());
};
@@ -762,7 +768,7 @@ pub async fn wait_for_batches(provider: Option<&PredictionProvider>) -> anyhow::
fn pending_batch_count(provider: Option<&PredictionProvider>) -> anyhow::Result<usize> {
match provider {
- Some(PredictionProvider::Teacher(backend)) => match backend {
+ Some(PredictionProvider::Teacher(backend, _)) => match backend {
TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => {
let llm_client = ANTHROPIC_CLIENT.get_or_init(|| {
AnthropicClient::batch(&crate::paths::LLM_CACHE_DB)
@@ -525,6 +525,7 @@ mod tests {
use crate::{PredictionProvider, TeacherBackend};
use edit_prediction::example_spec::ExampleSpec;
use std::{path::Path, sync::Arc};
+ use zeta_prompt::ZetaFormat;
fn example_with_previous_prediction() -> Example {
Example {
@@ -557,7 +558,10 @@ mod tests {
editable_region_offset: Some(4),
}),
error: None,
- provider: PredictionProvider::Teacher(TeacherBackend::Sonnet45),
+ provider: PredictionProvider::Teacher(
+ TeacherBackend::Sonnet45,
+ ZetaFormat::default(),
+ ),
cumulative_logprob: None,
avg_logprob: None,
}],
@@ -52,7 +52,7 @@ pub async fn run_scoring(
let old_editable_region = if let Some(p) = example.prompt.as_ref() {
if matches!(
p.provider,
- PredictionProvider::Teacher(_) | PredictionProvider::TeacherNonBatching(_)
+ PredictionProvider::Teacher(_, _) | PredictionProvider::TeacherNonBatching(_, _)
) {
Some(
TeacherPrompt::extract_editable_region(&p.input)?
@@ -11,6 +11,7 @@ const MAX_NUDGE_LINES: usize = 5;
pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
+pub const V0327_END_MARKER: &str = "<[end▁of▁sentence]>";
pub fn marker_tag(number: usize) -> String {
format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
@@ -143,6 +144,112 @@ pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
}
+fn line_start_at_or_before(text: &str, offset: usize) -> usize {
+ let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
+ text[..bounded_offset]
+ .rfind('\n')
+ .map(|index| index + 1)
+ .unwrap_or(0)
+}
+
+fn line_end_at_or_after(text: &str, offset: usize) -> usize {
+ let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
+ if bounded_offset >= text.len() {
+ return text.len();
+ }
+
+ text[bounded_offset..]
+ .find('\n')
+ .map(|index| bounded_offset + index + 1)
+ .unwrap_or(text.len())
+}
+
+fn grow_v0327_candidate_range(
+ text: &str,
+ cursor_offset: usize,
+ editable_token_limit: usize,
+) -> std::ops::Range<usize> {
+ if text.is_empty() {
+ return 0..0;
+ }
+
+ let byte_budget = editable_token_limit.saturating_mul(3).max(1);
+ let half_budget = byte_budget / 2;
+
+ let mut start = cursor_offset.saturating_sub(half_budget);
+ let mut end = start.saturating_add(byte_budget).min(text.len());
+
+ if end.saturating_sub(start) < byte_budget {
+ start = end.saturating_sub(byte_budget);
+ }
+
+ start = line_start_at_or_before(text, start);
+ end = line_end_at_or_after(text, end);
+
+ if start < end {
+ start..end
+ } else {
+ let line_start = line_start_at_or_before(text, cursor_offset);
+ let line_end = line_end_at_or_after(text, cursor_offset);
+ line_start..line_end.max(line_start)
+ }
+}
+
+fn trim_v0327_candidate_range_to_markers(
+ text: &str,
+ candidate_range: std::ops::Range<usize>,
+ cursor_offset: usize,
+) -> std::ops::Range<usize> {
+ let candidate_text = &text[candidate_range.clone()];
+ let marker_offsets = compute_marker_offsets_v0318(candidate_text);
+
+ if marker_offsets.len() <= 2 {
+ return candidate_range;
+ }
+
+ let candidate_cursor_offset = cursor_offset
+ .saturating_sub(candidate_range.start)
+ .min(candidate_text.len());
+ let first_internal_marker_index = if candidate_cursor_offset >= marker_offsets[1] {
+ 1
+ } else {
+ 0
+ };
+ let last_internal_marker_index = marker_offsets.len() - 2;
+ let last_marker_index = marker_offsets.len() - 1;
+ let end_marker_index = if candidate_cursor_offset <= marker_offsets[last_internal_marker_index]
+ {
+ last_internal_marker_index
+ } else {
+ last_marker_index
+ };
+
+ let trimmed_start = candidate_range.start + marker_offsets[first_internal_marker_index];
+ let trimmed_end = candidate_range.start + marker_offsets[end_marker_index];
+
+ if trimmed_start < trimmed_end {
+ trimmed_start..trimmed_end
+ } else {
+ let block_index = cursor_block_index(Some(candidate_cursor_offset), &marker_offsets);
+ let start = candidate_range.start + marker_offsets[block_index];
+ let end = candidate_range.start + marker_offsets[block_index + 1];
+ if start < end {
+ start..end
+ } else {
+ candidate_range
+ }
+ }
+}
+
+pub fn compute_v0327_editable_range(
+ text: &str,
+ cursor_offset: usize,
+ editable_token_limit: usize,
+) -> std::ops::Range<usize> {
+ let candidate_range = grow_v0327_candidate_range(text, cursor_offset, editable_token_limit);
+ trim_v0327_candidate_range_to_markers(text, candidate_range, cursor_offset)
+}
+
/// Write the editable region content with marker tags, inserting the cursor
/// marker at the given offset within the editable text.
pub fn write_editable_with_markers(
@@ -1113,6 +1220,32 @@ hhhhhhhhhh = 8;
assert_eq!(offsets, vec![0, 0]);
}
+ #[test]
+ fn test_compute_v0327_editable_range_trims_to_marker_boundaries() {
+ let text = (0..80).map(|_| "x\n").collect::<String>();
+ let cursor_offset = text.find("x\nx\nx\nx\nx\n").expect("cursor anchor exists") + 40;
+
+ let candidate_range = grow_v0327_candidate_range(&text, cursor_offset, 20);
+ let editable_range = compute_v0327_editable_range(&text, cursor_offset, 20);
+ let marker_offsets = compute_marker_offsets_v0318(&text[candidate_range.clone()]);
+ let relative_start = editable_range.start - candidate_range.start;
+ let relative_end = editable_range.end - candidate_range.start;
+
+ assert!(
+ marker_offsets.len() > 2,
+ "expected interior markers: {marker_offsets:?}"
+ );
+ assert!(marker_offsets.contains(&relative_start));
+ assert!(marker_offsets.contains(&relative_end));
+ assert!(editable_range.start <= cursor_offset);
+ assert!(editable_range.end >= cursor_offset);
+ assert!(
+ editable_range.start > candidate_range.start
+ || editable_range.end < candidate_range.end,
+ "expected at least one side to trim from {candidate_range:?} down to {editable_range:?}"
+ );
+ }
+
#[test]
fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
let text = "\
@@ -89,10 +89,12 @@ pub enum ZetaFormat {
V0306SeedMultiRegions,
/// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
V0316SeedMultiRegions,
- /// V0316 with larger block sizes.
- V0318SeedMultiRegions,
/// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
V0317SeedMultiRegions,
+ /// V0316 with larger block sizes.
+ V0318SeedMultiRegions,
+ /// V0318-style markers over the full available current file excerpt with no related files.
+ V0327SingleFile,
}
impl std::fmt::Display for ZetaFormat {
@@ -279,6 +281,18 @@ pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str]
];
TOKENS
}
+ ZetaFormat::V0327SingleFile => {
+ static TOKENS: &[&str] = &[
+ seed_coder::FIM_SUFFIX,
+ seed_coder::FIM_PREFIX,
+ seed_coder::FIM_MIDDLE,
+ seed_coder::FILE_MARKER,
+ multi_region::V0327_END_MARKER,
+ CURSOR_MARKER,
+ multi_region::MARKER_TAG_PREFIX,
+ ];
+ TOKENS
+ }
ZetaFormat::V0306SeedMultiRegions => {
static TOKENS: &[&str] = &[
seed_coder::FIM_SUFFIX,
@@ -310,7 +324,9 @@ pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
| ZetaFormat::V0316SeedMultiRegions
| ZetaFormat::V0318SeedMultiRegions
| ZetaFormat::V0317SeedMultiRegions
+ | ZetaFormat::V0327SingleFile
| ZetaFormat::V0304SeedNoEdits => (350, 150),
+
ZetaFormat::V0304VariableEdit => (1024, 0),
}
}
@@ -331,9 +347,11 @@ pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
+ ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
}
}
+/// Return (editable_range, context_range) for the prompt format
pub fn excerpt_ranges_for_format(
format: ZetaFormat,
ranges: &ExcerptRanges,
@@ -360,6 +378,14 @@ pub fn excerpt_ranges_for_format(
ranges.editable_350.clone(),
ranges.editable_350_context_150.clone(),
),
+ ZetaFormat::V0327SingleFile => (
+ ranges.editable_350_context_150.clone(),
+ ranges.context_8192.clone().unwrap_or(
+ // shouldn't be used, only for compat with old data/clients
+ ranges.editable_350_context_150.clone(),
+ ),
+ ),
+
ZetaFormat::V0304VariableEdit => {
let context = ranges
.editable_350_context_1024
@@ -463,6 +489,14 @@ pub fn write_cursor_excerpt_section_for_format(
cursor_offset,
));
}
+ ZetaFormat::V0327SingleFile => {
+ prompt.push_str(&build_v0318_cursor_prefix(
+ path,
+ context,
+ editable_range,
+ cursor_offset,
+ ));
+ }
}
}
@@ -585,6 +619,40 @@ fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
return start_row..end_row;
}
+fn assemble_single_file_fim_prompt(
+ context: &str,
+ editable_range: &Range<usize>,
+ cursor_prefix_section: &str,
+ events: &[Arc<Event>],
+ max_tokens: usize,
+) -> String {
+ let suffix_section = seed_coder::build_suffix_section(context, editable_range);
+
+ let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
+ let cursor_prefix_tokens =
+ estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
+ let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
+
+ let edit_history_section = format_edit_history_within_budget(
+ events,
+ seed_coder::FILE_MARKER,
+ "edit_history",
+ budget_after_cursor,
+ max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
+ );
+
+ let mut prompt = String::new();
+ prompt.push_str(&suffix_section);
+ prompt.push_str(seed_coder::FIM_PREFIX);
+ prompt.push_str(&edit_history_section);
+ if !edit_history_section.is_empty() {
+ prompt.push('\n');
+ }
+ prompt.push_str(cursor_prefix_section);
+ prompt.push_str(seed_coder::FIM_MIDDLE);
+ prompt
+}
+
pub fn format_prompt_with_budget_for_format(
input: &ZetaPromptInput,
format: ZetaFormat,
@@ -596,18 +664,19 @@ pub fn format_prompt_with_budget_for_format(
let empty_files = Vec::new();
let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
- let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
+ let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
let row_range = relative_row_range.start + cursor_excerpt_start_row
..relative_row_range.end + cursor_excerpt_start_row;
- &filter_redundant_excerpts(
+ filter_redundant_excerpts(
input_related_files.to_vec(),
input.cursor_path.as_ref(),
row_range,
)
} else {
- input_related_files
+ input_related_files.to_vec()
};
+ let related_files = filtered_related_files.as_slice();
let prompt = match format {
ZetaFormat::V0211SeedCoder
@@ -636,6 +705,25 @@ pub fn format_prompt_with_budget_for_format(
budget_with_margin,
)
}
+ ZetaFormat::V0327SingleFile => {
+ let mut cursor_section = String::new();
+ write_cursor_excerpt_section_for_format(
+ format,
+ &mut cursor_section,
+ path,
+ context,
+ &editable_range,
+ cursor_offset,
+ );
+
+ assemble_single_file_fim_prompt(
+ context,
+ &editable_range,
+ &cursor_section,
+ &input.events,
+ apply_prompt_budget_margin(max_tokens),
+ )
+ }
_ => {
let mut cursor_section = String::new();
write_cursor_excerpt_section_for_format(
@@ -714,7 +802,8 @@ pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
| ZetaFormat::V0306SeedMultiRegions
| ZetaFormat::V0316SeedMultiRegions
| ZetaFormat::V0318SeedMultiRegions
- | ZetaFormat::V0317SeedMultiRegions => 6,
+ | ZetaFormat::V0317SeedMultiRegions
+ | ZetaFormat::V0327SingleFile => 6,
}
}
@@ -737,7 +826,8 @@ pub fn get_prefill_for_format(
| ZetaFormat::V0306SeedMultiRegions
| ZetaFormat::V0316SeedMultiRegions
| ZetaFormat::V0318SeedMultiRegions
- | ZetaFormat::V0317SeedMultiRegions => String::new(),
+ | ZetaFormat::V0317SeedMultiRegions
+ | ZetaFormat::V0327SingleFile => String::new(),
}
}
@@ -752,6 +842,8 @@ pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str>
ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
+ ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
+
ZetaFormat::V0112MiddleAtEnd
| ZetaFormat::V0113Ordered
| ZetaFormat::V0114180EditableRegion
@@ -822,6 +914,22 @@ pub fn encode_patch_as_output_for_format(
Ok(None)
}
}
+ ZetaFormat::V0327SingleFile => {
+ let empty_patch = patch.lines().count() <= 3;
+ if empty_patch {
+ let marker_offsets =
+ multi_region::compute_marker_offsets_v0318(old_editable_region);
+ let marker_num =
+ multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
+ let tag = multi_region::marker_tag(marker_num);
+ Ok(Some(format!(
+ "{tag}{tag}{}",
+ multi_region::V0327_END_MARKER
+ )))
+ } else {
+ Ok(None)
+ }
+ }
_ => Ok(None),
}
}
@@ -1027,6 +1135,10 @@ pub fn parse_zeta2_model_output(
Some(cursor_offset_in_editable),
)?,
),
+ ZetaFormat::V0327SingleFile => (
+ editable_range_in_context,
+ multi_region::apply_marker_span_v0318(old_editable_region, output)?,
+ ),
_ => (editable_range_in_context, output.to_string()),
};
@@ -1135,7 +1247,16 @@ pub fn resolve_cursor_region(
input: &ZetaPromptInput,
format: ZetaFormat,
) -> (&str, Range<usize>, Range<usize>, usize) {
- let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
+ let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
+ let (editable_tokens, _) = token_limits_for_format(format);
+ let context_range = 0..input.cursor_excerpt.len();
+ let editable_range = multi_region::compute_v0327_editable_range(
+ &input.cursor_excerpt,
+ input.cursor_offset_in_excerpt,
+ editable_tokens,
+ );
+ (editable_range, context_range)
+ } else if let Some(syntax_ranges) = &input.syntax_ranges {
let (editable_tokens, context_tokens) = token_limits_for_format(format);
compute_editable_and_context_ranges(
&input.cursor_excerpt,
@@ -1147,6 +1268,7 @@ pub fn resolve_cursor_region(
} else {
excerpt_range_for_format(format, &input.excerpt_ranges)
};
+
let context_start = context_range.start;
let context_text = &input.cursor_excerpt[context_range.clone()];
let adjusted_editable =
@@ -3218,7 +3340,7 @@ pub mod seed_coder {
prompt
}
- fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
+ pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
let mut section = String::new();
section.push_str(FIM_SUFFIX);
section.push_str(&context[editable_range.end..]);
@@ -5005,6 +5127,71 @@ mod tests {
assert!(prompt.contains(CURSOR_MARKER));
}
+ #[test]
+ fn test_v0327_formats_single_file_prompt_without_related_files() {
+ let excerpt = indoc! {"
+ line01
+ line02
+ line03
+ line04
+ line05
+ line06
+ line07
+ line08
+ line09
+ line10
+ line11
+ line12
+ line13
+ line14
+ line15
+ line16
+ line17
+ line18
+ line19
+ line20
+ "};
+ let cursor_offset = excerpt.find("line10").expect("cursor line exists");
+ let input = make_input(
+ excerpt,
+ 0..excerpt.len(),
+ cursor_offset,
+ vec![make_event("a.rs", "-x\n+y\n")],
+ vec![make_related_file("related.rs", "fn helper() {}\n")],
+ );
+
+ let prompt =
+ format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
+ .expect("v0327 prompt should fit");
+
+ assert!(prompt.contains("line01"));
+ assert!(prompt.contains("line20"));
+ assert!(prompt.contains("<filename>edit_history"));
+ assert!(prompt.contains("<filename>test.rs"));
+ assert!(prompt.contains(CURSOR_MARKER));
+ assert!(!prompt.contains("related.rs"));
+ assert!(!prompt.contains("fn helper() {}"));
+ }
+
+ #[test]
+ fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
+ let excerpt = (0..80)
+ .map(|index| format!("l{index:02}\n"))
+ .collect::<String>();
+ let cursor_offset = excerpt.find("l40").expect("cursor line exists");
+ let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
+
+ let (context, editable_range, context_range, adjusted_cursor) =
+ resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
+
+ assert_eq!(context, excerpt);
+ assert_eq!(context_range, 0..excerpt.len());
+ assert_eq!(adjusted_cursor, cursor_offset);
+ assert!(editable_range.start < adjusted_cursor);
+ assert!(editable_range.end > adjusted_cursor);
+ assert!(editable_range.end < excerpt.len());
+ }
+
#[test]
fn test_seed_coder_no_context() {
let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);