From d4af86001c2ecc8f22002f6f0fc346728a1a7d09 Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Wed, 18 Feb 2026 17:26:47 +0200 Subject: [PATCH] ep: Update teacher prompt (#49489) * More conservative predictions for prose * Explain "user accepted prediction" in the teacher prompt * Sonnet 4.6 support * Don't strip comments in teacher prompt's edit history Release Notes: - N/A --- .../evals/flask--add-test-function.md | 13 ++ .../evals/tree-sitter--if-let-to-match.md | 24 ++++ ...tree-sitter--tuple-to-struct-definition.md | 20 +++ ...e-sitter--tuple-to-struct-destructuring.md | 22 ++++ .../edit_prediction_cli/src/format_prompt.rs | 30 ++--- crates/edit_prediction_cli/src/main.rs | 17 ++- crates/edit_prediction_cli/src/predict.rs | 4 +- .../src/prompts/teacher.md | 124 +++++++++++++++++- 8 files changed, 224 insertions(+), 30 deletions(-) diff --git a/crates/edit_prediction_cli/evals/flask--add-test-function.md b/crates/edit_prediction_cli/evals/flask--add-test-function.md index ea9d47a6db4dfbd4ac48e55618417ce574699ec3..443acb52a7069c23f8120045d955acff9a5cadb5 100644 --- a/crates/edit_prediction_cli/evals/flask--add-test-function.md +++ b/crates/edit_prediction_cli/evals/flask--add-test-function.md @@ -147,6 +147,19 @@ def test_static_url_path(): + pass +def test_static_url_path(): +``` + +```diff +--- a/tests/test_basic.py ++++ b/tests/test_basic.py +@@ -1372,15 +1372,15 @@ +-de ++def test_static_folder(): +# ^[CURSOR_POSITION] ++ pass + + def test_static_url_path(): ``` diff --git a/crates/edit_prediction_cli/evals/tree-sitter--if-let-to-match.md b/crates/edit_prediction_cli/evals/tree-sitter--if-let-to-match.md index b1de3bb85207d07f4f70116d45ef2e25124092b7..86a30b5fb8fe1532ddd2fc3d8548b8928f402b6b 100644 --- a/crates/edit_prediction_cli/evals/tree-sitter--if-let-to-match.md +++ b/crates/edit_prediction_cli/evals/tree-sitter--if-let-to-match.md @@ -110,3 +110,27 @@ revision = "17e3c7a5c56527a179fa6e37ce7ee934493e5047" } } ``` + + +```diff +--- a/crates/loader/src/loader.rs ++++ b/crates/loader/src/loader.rs +@@ -736,13 +736,13 @@ + if let Some(parser_dir_name) = entry.file_name().to_str() { + if parser_dir_name.starts_with("tree-sitter-") { + self.find_language_configurations_at_path( + &parser_container_dir.join(parser_dir_name), + false, + ) + .ok(); + } + } + } + } ++ Err(e) => { ++ +# ^[CURSOR_POSITION] ++ } + } + } +``` diff --git a/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-definition.md b/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-definition.md index f603c00ea34e543927d1452b8cd1361f8c2bf147..fe6c37201b29766a824c5653350ef3ac16b78c01 100644 --- a/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-definition.md +++ b/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-definition.md @@ -89,6 +89,26 @@ pub struct CompileConfig<'a> { pub header_paths: Vec<&'a Path>, ``` + +```diff +--- a/tree-sitter/crates/loader/src/loader.rs ++++ b/tree-sitter/crates/loader/src/loader.rs +@@ -621,6 +621,8 @@ + wasm_store: Mutex>, + } + +-str ++struct LanguageEntry { ++ path: PathBuf, ++ language: OnceCell, ++ extra_files: Option>, ++} ++ + pub struct CompileConfig<'a> { + pub src_path: &'a Path, + pub header_paths: Vec<&'a Path>, +``` + ```diff --- a/tree-sitter/crates/loader/src/loader.rs +++ b/tree-sitter/crates/loader/src/loader.rs diff --git a/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-destructuring.md b/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-destructuring.md index b4c7a9d5b7d0e8bd8d715abc90a9d687a0abd050..a55495795e48cc24d19eb8c3aef7b55968dafab1 100644 --- a/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-destructuring.md +++ b/crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-destructuring.md @@ -129,3 +129,25 @@ revision = "24007727d42b4caceda3095ac685c463fae1ba1a" )) }) ``` + +```diff +--- a/tree-sitter/crates/loader/src/loader.rs ++++ b/tree-sitter/crates/loader/src/loader.rs +@@ -926,0 +926,0 @@ + } + + fn language_for_id(&self, id: usize) -> LoaderResult { +- let (path, language, externals) = &self.languages_by_id[id]; ++ let LanguageEntry { path, language, external_files } = &self.languages_by_id[id]; + language + .get_or_try_init(|| { + let src_path = path.join("src"); + self.load_language_at_path(CompileConfig::new( + &src_path, +- externals.as_deref(), ++ external_files.as_deref(), + None, + )) + }) + .cloned() +``` diff --git a/crates/edit_prediction_cli/src/format_prompt.rs b/crates/edit_prediction_cli/src/format_prompt.rs index 3797f84ac8f4dddaa66b6a10bd1b68fb46cfd5e5..aadecf4b29a093710947ed2c5df2ce1543c3cf66 100644 --- a/crates/edit_prediction_cli/src/format_prompt.rs +++ b/crates/edit_prediction_cli/src/format_prompt.rs @@ -284,23 +284,18 @@ impl TeacherPrompt { } fn format_edit_history(edit_history: &str) -> String { - // Strip comments ("garbage lines") from edit history - let lines = edit_history - .lines() - .filter(|&s| Self::is_udiff_content_line(s)) - .collect::>(); - - let history_lines = if lines.len() > Self::MAX_HISTORY_LINES { - &lines[lines.len() - Self::MAX_HISTORY_LINES..] - } else { - &lines - }; + let lines: Vec<&str> = edit_history.lines().collect(); - if history_lines.is_empty() { + if lines.is_empty() { return "(No edit history)".to_string(); } - history_lines.join("\n") + if lines.len() > Self::MAX_HISTORY_LINES { + let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n"); + format!("{truncated}\n[...truncated...]") + } else { + lines.join("\n") + } } pub fn format_context(example: &Example) -> String { @@ -376,15 +371,6 @@ impl TeacherPrompt { let region = &text[start..end]; Ok(region.strip_suffix('\n').unwrap_or(region).to_string()) } - - fn is_udiff_content_line(s: &str) -> bool { - s.starts_with("-") - || s.starts_with("+") - || s.starts_with(" ") - || s.starts_with("---") - || s.starts_with("+++") - || s.starts_with("@@") - } } /// Extract the cursor excerpt from an example. diff --git a/crates/edit_prediction_cli/src/main.rs b/crates/edit_prediction_cli/src/main.rs index 887b313235936df9d9c7b7dc3fd02366790ec9cc..e08fb62090ed4d09fc408ced6a684ed0ffad2233 100644 --- a/crates/edit_prediction_cli/src/main.rs +++ b/crates/edit_prediction_cli/src/main.rs @@ -297,8 +297,10 @@ struct EvalArgs { summary_json: Option, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Hash)] pub enum TeacherBackend { + Sonnet46, + #[default] Sonnet45, Gpt52, } @@ -306,6 +308,7 @@ pub enum TeacherBackend { impl std::fmt::Display for TeacherBackend { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + TeacherBackend::Sonnet46 => write!(f, "sonnet46"), TeacherBackend::Sonnet45 => write!(f, "sonnet45"), TeacherBackend::Gpt52 => write!(f, "gpt52"), } @@ -318,9 +321,12 @@ impl std::str::FromStr for TeacherBackend { fn from_str(s: &str) -> Result { match s.to_lowercase().as_str() { "sonnet45" | "sonnet" | "claude" => Ok(TeacherBackend::Sonnet45), + "sonnet46" => Ok(TeacherBackend::Sonnet46), "gpt52" | "gpt" | "openai" => Ok(TeacherBackend::Gpt52), "v0114180editableregion" => Ok(TeacherBackend::Sonnet45), - _ => anyhow::bail!("unknown teacher backend `{s}`. Valid options: sonnet45, gpt52"), + _ => anyhow::bail!( + "unknown teacher backend `{s}`. Valid options: sonnet45, sonnet46, gpt52" + ), } } } @@ -329,6 +335,7 @@ impl TeacherBackend { pub fn model_name(&self) -> &'static str { match self { TeacherBackend::Sonnet45 => "claude-sonnet-4-5", + TeacherBackend::Sonnet46 => "claude-sonnet-4-6", TeacherBackend::Gpt52 => "gpt-5.2", } } @@ -386,14 +393,14 @@ impl std::str::FromStr for PredictionProvider { let backend = arg .map(|a| a.parse()) .transpose()? - .unwrap_or(TeacherBackend::Sonnet45); + .unwrap_or(TeacherBackend::default()); Ok(PredictionProvider::Teacher(backend)) } "teacher-non-batching" | "teacher_non_batching" | "teachernonbatching" => { let backend = arg .map(|a| a.parse()) .transpose()? - .unwrap_or(TeacherBackend::Sonnet45); + .unwrap_or(TeacherBackend::default()); Ok(PredictionProvider::TeacherNonBatching(backend)) } "repair" => Ok(PredictionProvider::Repair), @@ -401,7 +408,7 @@ impl std::str::FromStr for PredictionProvider { anyhow::bail!( "unknown provider `{provider}`. Valid options: sweep, mercury, zeta1, zeta2, zeta2:, teacher, teacher:, teacher-non-batching, repair\n\ For zeta2, you can optionally specify a version like `zeta2:ordered` or `zeta2:V0113_Ordered`.\n\ - For teacher, you can specify a backend like `teacher:sonnet45` or `teacher:gpt52`.\n\ + For teacher, you can specify a backend like `teacher:sonnet46` or `teacher:gpt52`.\n\ Available zeta versions:\n{}", ZetaFormat::options_as_string() ) diff --git a/crates/edit_prediction_cli/src/predict.rs b/crates/edit_prediction_cli/src/predict.rs index f7021f15b4900fd050f1b2019553528f919e038d..6ad7880bda369fff8e35ac77c422471f989cb8b7 100644 --- a/crates/edit_prediction_cli/src/predict.rs +++ b/crates/edit_prediction_cli/src/predict.rs @@ -293,7 +293,7 @@ async fn predict_teacher( step_progress: &crate::progress::StepProgress, ) -> anyhow::Result<()> { match backend { - TeacherBackend::Sonnet45 => { + TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => { predict_anthropic( example, backend, @@ -483,7 +483,7 @@ async fn predict_openai( pub async fn sync_batches(provider: Option<&PredictionProvider>) -> anyhow::Result<()> { match provider { Some(PredictionProvider::Teacher(backend)) => match backend { - TeacherBackend::Sonnet45 => { + TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => { let llm_client = ANTHROPIC_CLIENT.get_or_init(|| { AnthropicClient::batch(&crate::paths::LLM_CACHE_DB) .expect("Failed to create Anthropic client") diff --git a/crates/edit_prediction_cli/src/prompts/teacher.md b/crates/edit_prediction_cli/src/prompts/teacher.md index 4f202b2c6b068371f4788f0f6db9f1af334f7686..b5a07000a9d31144ab4886d2a3db4da03181ecf4 100644 --- a/crates/edit_prediction_cli/src/prompts/teacher.md +++ b/crates/edit_prediction_cli/src/prompts/teacher.md @@ -21,18 +21,21 @@ You are an edit prediction assistant in a code editor. Your task is to predict t - Only add NEW content that extends the user's work forward; never restore what they removed - **Key test**: if your prediction would make the code more similar to what it was BEFORE the user's edit, output `NO_EDITS` instead - **Never assume a deletion was accidental.** Even if removing content breaks the code, breaks a pattern, or leaves text looking "incomplete", respect it. The user may be mid-rewrite. Do NOT "complete" partial text by restoring what was deleted. +- Auto-generated code can be modified: Hunks marked with `// User accepted prediction:` contain code from a previous prediction the user accepted. Unlike user-typed content, these hunks CAN be edited, corrected, or replaced if it improves the code. The "never undo/revert" rule protects the user's *current typing intent*—auto-generated code doesn't carry this protection - Do not just mechanically apply patterns - reason about what changes make sense given the context and the programmer's apparent goals. - Do not just fix syntax errors - look for the broader refactoring pattern and apply it systematically throughout the code. - Keep existing formatting unless it's absolutely necessary - When edit history and surrounding code suggest different edits, prioritize the most recent edits in the history as they best reflect current intent. - Treat partial text at or near the cursor as the beginning of something the user is actively typing. Complete the code the user appears to be creating based on context. - When completing partial code, prefer predictions that save meaningful keystrokes, even if this requires making educated guesses about the user's intent. -- It's better to make a substantive prediction that might be rejected than to make a minimal prediction that saves only a few keystrokes. +- For code, it's better to make a substantive prediction that might be rejected than to make a minimal prediction that saves only a few keystrokes. +- When the user is editing prose or documentation (e.g. Markdown, comments, plain text), predict conservatively. Complete the current fragment or sentence, but do not generate additional lines of free-form content since prose is less constrained than code and more prone to incorrect continuations. # Input Format You will be provided with: 1. The user's *edit history*, in chronological order. Use this to infer the user's trajectory and predict the next most logical edit. + - Hunks preceded by `// User accepted prediction:` indicate code that was auto-generated by a previous prediction and accepted by the user. These are treated differently than user-typed edits (see Rules). 2. A set of *related excerpts* from the user's codebase. Some of these may be needed for correctly predicting the next edit. - `…` may appear within a related file to indicate that some code has been skipped. 3. An excerpt from the user's *current file*. @@ -235,6 +238,65 @@ The user just fixed a bug in the `add` function, changing subtraction to additio NO_EDITS ````` +## Example 6 + +The user accepted a prediction for a function, then started renaming it. The original arguments were auto-generated (marked with `// User accepted prediction:`), so they CAN be updated to match the new function name. This is NOT reverting user input—it's improving auto-generated scaffolding. + +### User Edit History + +````` +--- a/math_utils.py ++++ b/math_utils.py +@@ -3,3 +3,5 @@ + def calculate_rectangle_area(width, height): + return width * height + ++de + +// User accepted prediction: +--- a/math_utils.py ++++ b/math_utils.py +@@ -3,5 +3,7 @@ + def calculate_rectangle_area(width, height): + return width * height + +-de ++def calculate_rectangle_perimeter(width, height): ++ + +--- a/math_utils.py ++++ b/math_utils.py +@@ -5,5 +5,5 @@ + return width * height + +-def calculate_rectangle_perimeter(width, height): ++def calculate_sq_perimeter(width, height): + +````` + +### Current File + +`````math_utils.py +def calculate_rectangle_area(width, height): + return width * height + +<|editable_region_start|> +def calculate_sq<|user_cursor|>_perimeter(width, height): + +<|editable_region_end|> +````` + +### Output + +The user accepted a prediction for `calculate_rectangle_perimeter(width, height)`, then started renaming `rectangle` to `square`. Since squares have equal sides, the arguments should change from `(width, height)` to `(side)`. The arguments were auto-generated (from an accepted prediction), so modifying them is appropriate. + +````` +<|editable_region_start|> +def calculate_square_perimeter(side): + <|user_cursor|> +<|editable_region_end|> +````` + ## Example 5 The user just deleted code, leaving behind what looks incomplete. You must NOT "complete" it by restoring deleted content—that would undo their edit. Output NO_EDITS. **This is the correct response even though the code appears broken.** @@ -269,6 +331,66 @@ The user deleted `ashdb` from `/tmp/crashdb`, leaving `/tmp/cr`. Although this l NO_EDITS ````` +## Example 6 + +The user accepted a prediction for a function, then started renaming it. The original arguments were auto-generated (marked with `// User accepted prediction:`), so they CAN be updated to match the new function name. This is NOT reverting user input—it's improving auto-generated scaffolding. + +### User Edit History + +````` +--- a/math_utils.py ++++ b/math_utils.py +@@ -3,3 +3,5 @@ + def calculate_rectangle_area(width, height): + return width * height + ++de + +// User accepted prediction: +--- a/math_utils.py ++++ b/math_utils.py +@@ -3,5 +3,7 @@ + def calculate_rectangle_area(width, height): + return width * height + +-de ++def calculate_rectangle_perimeter(width, height): ++ + +--- a/math_utils.py ++++ b/math_utils.py +@@ -5,5 +5,5 @@ + return width * height + +-def calculate_rectangle_perimeter(width, height): ++def calculate_sq_perimeter(width, height): + +````` + +### Current File + +`````math_utils.py +def calculate_rectangle_area(width, height): + return width * height + +<|editable_region_start|> +def calculate_sq<|user_cursor|>_perimeter(width, height): + +<|editable_region_end|> +````` + +### Output + +The user accepted a prediction for `calculate_rectangle_perimeter(width, height)`, then started renaming `rectangle` to `square`. Since squares have equal sides, the arguments should change from `(width, height)` to `(side)`. The arguments were auto-generated (from an accepted prediction), so modifying them is appropriate. + +````` +<|editable_region_start|> +def calculate_square_perimeter(side): + <|user_cursor|> +<|editable_region_end|> +````` + + # Your task: