From fba7f4d8cc0466d22e291547b47e050ea77090cf Mon Sep 17 00:00:00 2001 From: Agus Zubiaga Date: Thu, 16 Oct 2025 11:13:19 -0300 Subject: [PATCH] zeta2: Update prompts to match training more closely (#40383) Release Notes: - N/A --- .../cloud_llm_client/src/predict_edits_v3.rs | 6 +- .../src/cloud_zeta2_prompt.rs | 103 +++++++++++++----- .../src/edit_prediction_context.rs | 26 +++-- crates/zeta2/src/zeta2.rs | 1 + crates/zeta2_tools/src/zeta2_tools.rs | 27 +++-- crates/zeta_cli/src/main.rs | 5 +- 6 files changed, 116 insertions(+), 52 deletions(-) diff --git a/crates/cloud_llm_client/src/predict_edits_v3.rs b/crates/cloud_llm_client/src/predict_edits_v3.rs index 60bbd8c8d6e55019f6b91df94a103eb83f3a100d..be665f94a460c170ce446bd14634a283c3255877 100644 --- a/crates/cloud_llm_client/src/predict_edits_v3.rs +++ b/crates/cloud_llm_client/src/predict_edits_v3.rs @@ -47,13 +47,13 @@ pub struct PredictEditsRequest { pub enum PromptFormat { MarkedExcerpt, LabeledSections, - NumberedLines, + NumLinesUniDiff, /// Prompt format intended for use via zeta_cli OnlySnippets, } impl PromptFormat { - pub const DEFAULT: PromptFormat = PromptFormat::NumberedLines; + pub const DEFAULT: PromptFormat = PromptFormat::NumLinesUniDiff; } impl Default for PromptFormat { @@ -74,7 +74,7 @@ impl std::fmt::Display for PromptFormat { PromptFormat::MarkedExcerpt => write!(f, "Marked Excerpt"), PromptFormat::LabeledSections => write!(f, "Labeled Sections"), PromptFormat::OnlySnippets => write!(f, "Only Snippets"), - PromptFormat::NumberedLines => write!(f, "Numbered Lines"), + PromptFormat::NumLinesUniDiff => write!(f, "Numbered Lines / Unified Diff"), } } } diff --git a/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs b/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs index d68c0defef050985160688de0d541671866a91ac..b8a9ef5f9a2e9e48c9f4bf6d4aef1cc168ba50df 100644 --- a/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs +++ b/crates/cloud_zeta2_prompt/src/cloud_zeta2_prompt.rs @@ -15,27 +15,30 @@ use strum::{EnumIter, IntoEnumIterator}; pub const DEFAULT_MAX_PROMPT_BYTES: usize = 10 * 1024; -pub const CURSOR_MARKER: &str = "<|cursor_position|>"; +pub const CURSOR_MARKER: &str = "<|user_cursor|>"; /// NOTE: Differs from zed version of constant - includes a newline pub const EDITABLE_REGION_START_MARKER_WITH_NEWLINE: &str = "<|editable_region_start|>\n"; /// NOTE: Differs from zed version of constant - includes a newline pub const EDITABLE_REGION_END_MARKER_WITH_NEWLINE: &str = "<|editable_region_end|>\n"; // TODO: use constants for markers? -const MARKED_EXCERPT_SYSTEM_PROMPT: &str = indoc! {" +const MARKED_EXCERPT_INSTRUCTIONS: &str = indoc! {" You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location. - The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|cursor_position|>. Please respond with edited code for that region. + The excerpt to edit will be wrapped in markers <|editable_region_start|> and <|editable_region_end|>. The cursor position is marked with <|user_cursor|>. Please respond with edited code for that region. Other code is provided for context, and `…` indicates when code has been skipped. + + # Edit History: + "}; -const LABELED_SECTIONS_SYSTEM_PROMPT: &str = indoc! {r#" +const LABELED_SECTIONS_INSTRUCTIONS: &str = indoc! {r#" You are a code completion assistant and your task is to analyze user edits, and suggest an edit to one of the provided sections of code. Sections of code are grouped by file and then labeled by `<|section_N|>` (e.g `<|section_8|>`). - The cursor position is marked with `<|cursor_position|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it. + The cursor position is marked with `<|user_cursor|>` and it will appear within a special section labeled `<|current_section|>`. Prefer editing the current section until no more changes are needed within it. Respond ONLY with the name of the section to edit on a single line, followed by all of the code that should replace that section. For example: @@ -43,9 +46,12 @@ const LABELED_SECTIONS_SYSTEM_PROMPT: &str = indoc! {r#" for i in 0..16 { println!("{i}"); } + + # Edit History: + "#}; -const NUMBERED_LINES_SYSTEM_PROMPT: &str = indoc! {r#" +const NUMBERED_LINES_INSTRUCTIONS: &str = indoc! {r#" # Instructions You are a code completion assistant helping a programmer finish their work. Your task is to: @@ -71,16 +77,27 @@ const NUMBERED_LINES_SYSTEM_PROMPT: &str = indoc! {r#" # Example output: ``` - --- a/distill-claude/tmp-outs/edits_history.txt - +++ b/distill-claude/tmp-outs/edits_history.txt + --- a/src/myapp/cli.py + +++ b/src/myapp/cli.py @@ -1,3 +1,3 @@ - - -import sys +import json ``` + + # Edit History: + "#}; +const UNIFIED_DIFF_REMINDER: &str = indoc! {" + --- + + Please analyze the edit history and the files, then provide the unified diff for your predicted edits. + Do not include the cursor marker in your output. + If you're editing multiple files, be sure to reflect filename in the hunk's header. +"}; + pub struct PlannedPrompt<'a> { request: &'a predict_edits_v3::PredictEditsRequest, /// Snippets to include in the prompt. These may overlap - they are merged / deduplicated in @@ -89,16 +106,6 @@ pub struct PlannedPrompt<'a> { budget_used: usize, } -pub fn system_prompt(format: PromptFormat) -> &'static str { - match format { - PromptFormat::MarkedExcerpt => MARKED_EXCERPT_SYSTEM_PROMPT, - PromptFormat::LabeledSections => LABELED_SECTIONS_SYSTEM_PROMPT, - PromptFormat::NumberedLines => NUMBERED_LINES_SYSTEM_PROMPT, - // only intended for use via zeta_cli - PromptFormat::OnlySnippets => "", - } -} - #[derive(Clone, Debug)] pub struct PlannedSnippet<'a> { path: Arc, @@ -397,21 +404,63 @@ impl<'a> PlannedPrompt<'a> { ), ], PromptFormat::LabeledSections => vec![(self.request.cursor_point, CURSOR_MARKER)], - PromptFormat::NumberedLines => vec![(self.request.cursor_point, CURSOR_MARKER)], + PromptFormat::NumLinesUniDiff => { + vec![(self.request.cursor_point, CURSOR_MARKER)] + } PromptFormat::OnlySnippets => vec![], }; - let mut prompt = String::new(); - prompt.push_str("## User Edits\n\n"); + let mut prompt = match self.request.prompt_format { + PromptFormat::MarkedExcerpt => MARKED_EXCERPT_INSTRUCTIONS.to_string(), + PromptFormat::LabeledSections => LABELED_SECTIONS_INSTRUCTIONS.to_string(), + PromptFormat::NumLinesUniDiff => NUMBERED_LINES_INSTRUCTIONS.to_string(), + // only intended for use via zeta_cli + PromptFormat::OnlySnippets => String::new(), + }; + if self.request.events.is_empty() { - prompt.push_str("No edits yet.\n"); + prompt.push_str("No edits yet.\n\n"); } else { + prompt.push_str( + "The following are the latest edits made by the user, from earlier to later.\n\n", + ); Self::push_events(&mut prompt, &self.request.events); } - prompt.push_str("\n## Code\n\n"); + if self.request.prompt_format == PromptFormat::NumLinesUniDiff { + if self.request.referenced_declarations.is_empty() { + prompt.push_str(indoc! {" + # File under the cursor: + + The cursor marker <|user_cursor|> indicates the current user cursor position. + The file is in current state, edits from edit history have been applied. + We prepend line numbers (e.g., `123|`); they are not part of the file. + + "}); + } else { + // Note: This hasn't been trained on yet + prompt.push_str(indoc! {" + # Code Excerpts: + + The cursor marker <|user_cursor|> indicates the current user cursor position. + Other excerpts of code from the project have been included as context based on their similarity to the code under the cursor. + Context excerpts are not guaranteed to be relevant, so use your own judgement. + Files are in their current state, edits from edit history have been applied. + We prepend line numbers (e.g., `123|`); they are not part of the file. + + "}); + } + } else { + prompt.push_str("\n## Code\n\n"); + } + let section_labels = self.push_file_snippets(&mut prompt, &mut excerpt_file_insertions, file_snippets)?; + + if self.request.prompt_format == PromptFormat::NumLinesUniDiff { + prompt.push_str(UNIFIED_DIFF_REMINDER); + } + Ok((prompt, section_labels)) } @@ -502,7 +551,7 @@ impl<'a> PlannedPrompt<'a> { match self.request.prompt_format { PromptFormat::MarkedExcerpt | PromptFormat::OnlySnippets - | PromptFormat::NumberedLines => { + | PromptFormat::NumLinesUniDiff => { if range.start.0 > 0 && !skipped_last_snippet { output.push_str("…\n"); } @@ -520,7 +569,7 @@ impl<'a> PlannedPrompt<'a> { } let push_full_snippet = |output: &mut String| { - if self.request.prompt_format == PromptFormat::NumberedLines { + if self.request.prompt_format == PromptFormat::NumLinesUniDiff { for (i, line) in snippet.text.lines().enumerate() { writeln!(output, "{}|{}", i as u32 + range.start.0 + 1, line)?; } @@ -543,7 +592,7 @@ impl<'a> PlannedPrompt<'a> { } else if !excerpt_file_insertions.is_empty() { let lines = snippet.text.lines().collect::>(); let push_line = |output: &mut String, line_ix: usize| { - if self.request.prompt_format == PromptFormat::NumberedLines { + if self.request.prompt_format == PromptFormat::NumLinesUniDiff { write!(output, "{}|", line_ix as u32 + range.start.0 + 1)?; } anyhow::Ok(writeln!(output, "{}", lines[line_ix])?) @@ -560,7 +609,7 @@ impl<'a> PlannedPrompt<'a> { push_line(output, line_ix)?; } if let Some(next_line) = lines.get(insertion_line_ix) { - if self.request.prompt_format == PromptFormat::NumberedLines { + if self.request.prompt_format == PromptFormat::NumLinesUniDiff { write!( output, "{}|", diff --git a/crates/edit_prediction_context/src/edit_prediction_context.rs b/crates/edit_prediction_context/src/edit_prediction_context.rs index 85b0c36d7342b8c83a6a6befb38a3f0c9753b093..34941f93b4017dfe8e96802b5078daf5652ed22a 100644 --- a/crates/edit_prediction_context/src/edit_prediction_context.rs +++ b/crates/edit_prediction_context/src/edit_prediction_context.rs @@ -27,6 +27,7 @@ pub use predict_edits_v3::Line; #[derive(Clone, Debug, PartialEq)] pub struct EditPredictionContextOptions { pub use_imports: bool, + pub use_references: bool, pub excerpt: EditPredictionExcerptOptions, pub score: EditPredictionScoreOptions, } @@ -116,19 +117,23 @@ impl EditPredictionContext { index_state, )?; let excerpt_text = excerpt.text(buffer); - let excerpt_occurrences = text_similarity::Occurrences::within_string(&excerpt_text.body); - let adjacent_start = Point::new(cursor_point.row.saturating_sub(2), 0); - let adjacent_end = Point::new(cursor_point.row + 1, 0); - let adjacent_occurrences = text_similarity::Occurrences::within_string( - &buffer - .text_for_range(adjacent_start..adjacent_end) - .collect::(), - ); + let declarations = if options.use_references + && let Some(index_state) = index_state + { + let excerpt_occurrences = + text_similarity::Occurrences::within_string(&excerpt_text.body); - let cursor_offset_in_file = cursor_point.to_offset(buffer); + let adjacent_start = Point::new(cursor_point.row.saturating_sub(2), 0); + let adjacent_end = Point::new(cursor_point.row + 1, 0); + let adjacent_occurrences = text_similarity::Occurrences::within_string( + &buffer + .text_for_range(adjacent_start..adjacent_end) + .collect::(), + ); + + let cursor_offset_in_file = cursor_point.to_offset(buffer); - let declarations = if let Some(index_state) = index_state { let references = get_references(&excerpt, &excerpt_text, buffer); scored_declarations( @@ -195,6 +200,7 @@ mod tests { buffer_snapshot, EditPredictionContextOptions { use_imports: true, + use_references: true, excerpt: EditPredictionExcerptOptions { max_bytes: 60, min_bytes: 10, diff --git a/crates/zeta2/src/zeta2.rs b/crates/zeta2/src/zeta2.rs index 16caee7aefde8c38d7a466fc9e1197c7ad21b94a..5cb163ceed135f0df7b1277908377a931b02aa7e 100644 --- a/crates/zeta2/src/zeta2.rs +++ b/crates/zeta2/src/zeta2.rs @@ -45,6 +45,7 @@ const MAX_EVENT_COUNT: usize = 16; pub const DEFAULT_CONTEXT_OPTIONS: EditPredictionContextOptions = EditPredictionContextOptions { use_imports: true, + use_references: false, excerpt: EditPredictionExcerptOptions { max_bytes: 512, min_bytes: 128, diff --git a/crates/zeta2_tools/src/zeta2_tools.rs b/crates/zeta2_tools/src/zeta2_tools.rs index 69536ad46806cb271ef987cadb4e95a2061ac953..0834fd18d8da9ff63b5de8ca19d7b5584425972b 100644 --- a/crates/zeta2_tools/src/zeta2_tools.rs +++ b/crates/zeta2_tools/src/zeta2_tools.rs @@ -20,9 +20,11 @@ use ui::{ContextMenu, ContextMenuEntry, DropdownMenu, prelude::*}; use ui_input::SingleLineInput; use util::{ResultExt, paths::PathStyle, rel_path::RelPath}; use workspace::{Item, SplitDirection, Workspace}; -use zeta2::{DEFAULT_CONTEXT_OPTIONS, PredictionDebugInfo, Zeta, ZetaOptions}; +use zeta2::{PredictionDebugInfo, Zeta, ZetaOptions}; -use edit_prediction_context::{DeclarationStyle, EditPredictionExcerptOptions}; +use edit_prediction_context::{ + DeclarationStyle, EditPredictionContextOptions, EditPredictionExcerptOptions, +}; actions!( dev, @@ -232,17 +234,20 @@ impl Zeta2Inspector { .unwrap_or_default() } - let mut context_options = DEFAULT_CONTEXT_OPTIONS.clone(); - context_options.excerpt = EditPredictionExcerptOptions { - max_bytes: number_input_value(&this.max_excerpt_bytes_input, cx), - min_bytes: number_input_value(&this.min_excerpt_bytes_input, cx), - target_before_cursor_over_total_bytes: number_input_value( - &this.cursor_context_ratio_input, - cx, - ), + let zeta_options = this.zeta.read(cx).options().clone(); + + let context_options = EditPredictionContextOptions { + excerpt: EditPredictionExcerptOptions { + max_bytes: number_input_value(&this.max_excerpt_bytes_input, cx), + min_bytes: number_input_value(&this.min_excerpt_bytes_input, cx), + target_before_cursor_over_total_bytes: number_input_value( + &this.cursor_context_ratio_input, + cx, + ), + }, + ..zeta_options.context }; - let zeta_options = this.zeta.read(cx).options(); this.set_options( ZetaOptions { context: context_options, diff --git a/crates/zeta_cli/src/main.rs b/crates/zeta_cli/src/main.rs index efd5dd2d0688571cf8cef9e77b7d89c6e8ad33a9..75b859d2f55d99cc37c961455ddcdb86a5f49351 100644 --- a/crates/zeta_cli/src/main.rs +++ b/crates/zeta_cli/src/main.rs @@ -94,6 +94,8 @@ struct Zeta2Args { file_indexing_parallelism: usize, #[arg(long, default_value_t = false)] disable_imports_gathering: bool, + #[arg(long, default_value_t = false)] + disable_reference_retrieval: bool, } #[derive(clap::ValueEnum, Default, Debug, Clone)] @@ -111,7 +113,7 @@ impl Into for PromptFormat { Self::MarkedExcerpt => predict_edits_v3::PromptFormat::MarkedExcerpt, Self::LabeledSections => predict_edits_v3::PromptFormat::LabeledSections, Self::OnlySnippets => predict_edits_v3::PromptFormat::OnlySnippets, - Self::NumberedLines => predict_edits_v3::PromptFormat::NumberedLines, + Self::NumberedLines => predict_edits_v3::PromptFormat::NumLinesUniDiff, } } } @@ -300,6 +302,7 @@ impl Zeta2Args { fn to_options(&self, omit_excerpt_overlaps: bool) -> zeta2::ZetaOptions { zeta2::ZetaOptions { context: EditPredictionContextOptions { + use_references: !self.disable_reference_retrieval, use_imports: !self.disable_imports_gathering, excerpt: EditPredictionExcerptOptions { max_bytes: self.max_excerpt_bytes,