ep: Update teacher prompt (#49489)

Oleksiy Syvokon created 1 month ago

* More conservative predictions for prose
* Explain "user accepted prediction" in the teacher prompt
* Sonnet 4.6 support
* Don't strip comments in teacher prompt's edit history

Release Notes:

- N/A

Change summary

crates/edit_prediction_cli/evals/flask--add-test-function.md                   |  13 
crates/edit_prediction_cli/evals/tree-sitter--if-let-to-match.md               |  24 
crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-definition.md    |  20 
crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-destructuring.md |  22 
crates/edit_prediction_cli/src/format_prompt.rs                                |  30 
crates/edit_prediction_cli/src/main.rs                                         |  17 
crates/edit_prediction_cli/src/predict.rs                                      |   4 
crates/edit_prediction_cli/src/prompts/teacher.md                              | 124 
8 files changed, 224 insertions(+), 30 deletions(-)

Detailed changes

crates/edit_prediction_cli/evals/flask--add-test-function.md 🔗

@@ -147,6 +147,19 @@ def test_static_url_path():
 +    pass
 
 
+def test_static_url_path():
+```
+
+```diff
+--- a/tests/test_basic.py
++++ b/tests/test_basic.py
+@@ -1372,15 +1372,15 @@
+-de
++def test_static_folder():
+#                       ^[CURSOR_POSITION]
++    pass
+
+
 def test_static_url_path():
 ```

crates/edit_prediction_cli/evals/tree-sitter--if-let-to-match.md 🔗

@@ -110,3 +110,27 @@ revision = "17e3c7a5c56527a179fa6e37ce7ee934493e5047"
              }
          }
 ```
+
+
+```diff
+--- a/crates/loader/src/loader.rs
++++ b/crates/loader/src/loader.rs
+@@ -736,13 +736,13 @@
+                         if let Some(parser_dir_name) = entry.file_name().to_str() {
+                             if parser_dir_name.starts_with("tree-sitter-") {
+                                 self.find_language_configurations_at_path(
+                                     &parser_container_dir.join(parser_dir_name),
+                                     false,
+                                 )
+                                 .ok();
+                             }
+                         }
+                     }
+                 }
++                Err(e) => {
++                    
+#                    ^[CURSOR_POSITION]
++                }
+             }
+         }
+```

crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-definition.md 🔗

@@ -89,6 +89,26 @@ pub struct CompileConfig<'a> {
      pub header_paths: Vec<&'a Path>,
 ```
 
+
+```diff
+--- a/tree-sitter/crates/loader/src/loader.rs
++++ b/tree-sitter/crates/loader/src/loader.rs
+@@ -621,6 +621,8 @@
+     wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
+ }
+
+-str
++struct LanguageEntry {
++    path: PathBuf,
++    language: OnceCell<Language>,
++    extra_files: Option<Vec<PathBuf>>,
++}
++
+ pub struct CompileConfig<'a> {
+     pub src_path: &'a Path,
+     pub header_paths: Vec<&'a Path>,
+```
+
 ```diff
 --- a/tree-sitter/crates/loader/src/loader.rs
 +++ b/tree-sitter/crates/loader/src/loader.rs

crates/edit_prediction_cli/evals/tree-sitter--tuple-to-struct-destructuring.md 🔗

@@ -129,3 +129,25 @@ revision = "24007727d42b4caceda3095ac685c463fae1ba1a"
                  ))
              })
 ```
+
+```diff
+--- a/tree-sitter/crates/loader/src/loader.rs
++++ b/tree-sitter/crates/loader/src/loader.rs
+@@ -926,0 +926,0 @@
+     }
+
+     fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
+-        let (path, language, externals) = &self.languages_by_id[id];
++        let LanguageEntry { path, language, external_files } = &self.languages_by_id[id];
+         language
+             .get_or_try_init(|| {
+                 let src_path = path.join("src");
+                 self.load_language_at_path(CompileConfig::new(
+                     &src_path,
+-                    externals.as_deref(),
++                    external_files.as_deref(),
+                     None,
+                 ))
+             })
+             .cloned()
+```

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -284,23 +284,18 @@ impl TeacherPrompt {
     }
 
     fn format_edit_history(edit_history: &str) -> String {
-        // Strip comments ("garbage lines") from edit history
-        let lines = edit_history
-            .lines()
-            .filter(|&s| Self::is_udiff_content_line(s))
-            .collect::<Vec<_>>();
-
-        let history_lines = if lines.len() > Self::MAX_HISTORY_LINES {
-            &lines[lines.len() - Self::MAX_HISTORY_LINES..]
-        } else {
-            &lines
-        };
+        let lines: Vec<&str> = edit_history.lines().collect();
 
-        if history_lines.is_empty() {
+        if lines.is_empty() {
             return "(No edit history)".to_string();
         }
 
-        history_lines.join("\n")
+        if lines.len() > Self::MAX_HISTORY_LINES {
+            let truncated = lines[lines.len() - Self::MAX_HISTORY_LINES..].join("\n");
+            format!("{truncated}\n[...truncated...]")
+        } else {
+            lines.join("\n")
+        }
     }
 
     pub fn format_context(example: &Example) -> String {
@@ -376,15 +371,6 @@ impl TeacherPrompt {
         let region = &text[start..end];
         Ok(region.strip_suffix('\n').unwrap_or(region).to_string())
     }
-
-    fn is_udiff_content_line(s: &str) -> bool {
-        s.starts_with("-")
-            || s.starts_with("+")
-            || s.starts_with(" ")
-            || s.starts_with("---")
-            || s.starts_with("+++")
-            || s.starts_with("@@")
-    }
 }
 
 /// Extract the cursor excerpt from an example.

crates/edit_prediction_cli/src/main.rs 🔗

@@ -297,8 +297,10 @@ struct EvalArgs {
     summary_json: Option<PathBuf>,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Hash)]
 pub enum TeacherBackend {
+    Sonnet46,
+    #[default]
     Sonnet45,
     Gpt52,
 }
@@ -306,6 +308,7 @@ pub enum TeacherBackend {
 impl std::fmt::Display for TeacherBackend {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
+            TeacherBackend::Sonnet46 => write!(f, "sonnet46"),
             TeacherBackend::Sonnet45 => write!(f, "sonnet45"),
             TeacherBackend::Gpt52 => write!(f, "gpt52"),
         }
@@ -318,9 +321,12 @@ impl std::str::FromStr for TeacherBackend {
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         match s.to_lowercase().as_str() {
             "sonnet45" | "sonnet" | "claude" => Ok(TeacherBackend::Sonnet45),
+            "sonnet46" => Ok(TeacherBackend::Sonnet46),
             "gpt52" | "gpt" | "openai" => Ok(TeacherBackend::Gpt52),
             "v0114180editableregion" => Ok(TeacherBackend::Sonnet45),
-            _ => anyhow::bail!("unknown teacher backend `{s}`. Valid options: sonnet45, gpt52"),
+            _ => anyhow::bail!(
+                "unknown teacher backend `{s}`. Valid options: sonnet45, sonnet46, gpt52"
+            ),
         }
     }
 }
@@ -329,6 +335,7 @@ impl TeacherBackend {
     pub fn model_name(&self) -> &'static str {
         match self {
             TeacherBackend::Sonnet45 => "claude-sonnet-4-5",
+            TeacherBackend::Sonnet46 => "claude-sonnet-4-6",
             TeacherBackend::Gpt52 => "gpt-5.2",
         }
     }
@@ -386,14 +393,14 @@ impl std::str::FromStr for PredictionProvider {
                 let backend = arg
                     .map(|a| a.parse())
                     .transpose()?
-                    .unwrap_or(TeacherBackend::Sonnet45);
+                    .unwrap_or(TeacherBackend::default());
                 Ok(PredictionProvider::Teacher(backend))
             }
             "teacher-non-batching" | "teacher_non_batching" | "teachernonbatching" => {
                 let backend = arg
                     .map(|a| a.parse())
                     .transpose()?
-                    .unwrap_or(TeacherBackend::Sonnet45);
+                    .unwrap_or(TeacherBackend::default());
                 Ok(PredictionProvider::TeacherNonBatching(backend))
             }
             "repair" => Ok(PredictionProvider::Repair),
@@ -401,7 +408,7 @@ impl std::str::FromStr for PredictionProvider {
                 anyhow::bail!(
                     "unknown provider `{provider}`. Valid options: sweep, mercury, zeta1, zeta2, zeta2:<version>, teacher, teacher:<backend>, teacher-non-batching, repair\n\
                  For zeta2, you can optionally specify a version like `zeta2:ordered` or `zeta2:V0113_Ordered`.\n\
-                 For teacher, you can specify a backend like `teacher:sonnet45` or `teacher:gpt52`.\n\
+                 For teacher, you can specify a backend like `teacher:sonnet46` or `teacher:gpt52`.\n\
                  Available zeta versions:\n{}",
                     ZetaFormat::options_as_string()
                 )

crates/edit_prediction_cli/src/predict.rs 🔗

@@ -293,7 +293,7 @@ async fn predict_teacher(
     step_progress: &crate::progress::StepProgress,
 ) -> anyhow::Result<()> {
     match backend {
-        TeacherBackend::Sonnet45 => {
+        TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => {
             predict_anthropic(
                 example,
                 backend,
@@ -483,7 +483,7 @@ async fn predict_openai(
 pub async fn sync_batches(provider: Option<&PredictionProvider>) -> anyhow::Result<()> {
     match provider {
         Some(PredictionProvider::Teacher(backend)) => match backend {
-            TeacherBackend::Sonnet45 => {
+            TeacherBackend::Sonnet45 | TeacherBackend::Sonnet46 => {
                 let llm_client = ANTHROPIC_CLIENT.get_or_init(|| {
                     AnthropicClient::batch(&crate::paths::LLM_CACHE_DB)
                         .expect("Failed to create Anthropic client")

crates/edit_prediction_cli/src/prompts/teacher.md 🔗

@@ -21,18 +21,21 @@ You are an edit prediction assistant in a code editor. Your task is to predict t
   - Only add NEW content that extends the user's work forward; never restore what they removed
   - **Key test**: if your prediction would make the code more similar to what it was BEFORE the user's edit, output `NO_EDITS` instead
   - **Never assume a deletion was accidental.** Even if removing content breaks the code, breaks a pattern, or leaves text looking "incomplete", respect it. The user may be mid-rewrite. Do NOT "complete" partial text by restoring what was deleted.
+- Auto-generated code can be modified: Hunks marked with `// User accepted prediction:` contain code from a previous prediction the user accepted. Unlike user-typed content, these hunks CAN be edited, corrected, or replaced if it improves the code. The "never undo/revert" rule protects the user's *current typing intent*—auto-generated code doesn't carry this protection
 - Do not just mechanically apply patterns - reason about what changes make sense given the context and the programmer's apparent goals.
 - Do not just fix syntax errors - look for the broader refactoring pattern and apply it systematically throughout the code.
 - Keep existing formatting unless it's absolutely necessary
 - When edit history and surrounding code suggest different edits, prioritize the most recent edits in the history as they best reflect current intent.
 - Treat partial text at or near the cursor as the beginning of something the user is actively typing. Complete the code the user appears to be creating based on context.
 - When completing partial code, prefer predictions that save meaningful keystrokes, even if this requires making educated guesses about the user's intent.
-- It's better to make a substantive prediction that might be rejected than to make a minimal prediction that saves only a few keystrokes.
+- For code, it's better to make a substantive prediction that might be rejected than to make a minimal prediction that saves only a few keystrokes.
+- When the user is editing prose or documentation (e.g. Markdown, comments, plain text), predict conservatively. Complete the current fragment or sentence, but do not generate additional lines of free-form content since prose is less constrained than code and more prone to incorrect continuations.
 
 # Input Format
 
 You will be provided with:
 1. The user's *edit history*, in chronological order. Use this to infer the user's trajectory and predict the next most logical edit.
+  - Hunks preceded by `// User accepted prediction:` indicate code that was auto-generated by a previous prediction and accepted by the user. These are treated differently than user-typed edits (see Rules).
 2. A set of *related excerpts* from the user's codebase. Some of these may be needed for correctly predicting the next edit.
   - `…` may appear within a related file to indicate that some code has been skipped.
 3. An excerpt from the user's *current file*.
@@ -235,6 +238,65 @@ The user just fixed a bug in the `add` function, changing subtraction to additio
 NO_EDITS
 `````
 
+## Example 6
+
+The user accepted a prediction for a function, then started renaming it. The original arguments were auto-generated (marked with `// User accepted prediction:`), so they CAN be updated to match the new function name. This is NOT reverting user input—it's improving auto-generated scaffolding.
+
+### User Edit History
+
+`````
+--- a/math_utils.py
++++ b/math_utils.py
+@@ -3,3 +3,5 @@
+ def calculate_rectangle_area(width, height):
+     return width * height
+
++de
+
+// User accepted prediction:
+--- a/math_utils.py
++++ b/math_utils.py
+@@ -3,5 +3,7 @@
+ def calculate_rectangle_area(width, height):
+     return width * height
+
+-de
++def calculate_rectangle_perimeter(width, height):
++
+
+--- a/math_utils.py
++++ b/math_utils.py
+@@ -5,5 +5,5 @@
+     return width * height
+
+-def calculate_rectangle_perimeter(width, height):
++def calculate_sq_perimeter(width, height):
+
+`````
+
+### Current File
+
+`````math_utils.py
+def calculate_rectangle_area(width, height):
+    return width * height
+
+<|editable_region_start|>
+def calculate_sq<|user_cursor|>_perimeter(width, height):
+
+<|editable_region_end|>
+`````
+
+### Output
+
+The user accepted a prediction for `calculate_rectangle_perimeter(width, height)`, then started renaming `rectangle` to `square`. Since squares have equal sides, the arguments should change from `(width, height)` to `(side)`. The arguments were auto-generated (from an accepted prediction), so modifying them is appropriate.
+
+`````
+<|editable_region_start|>
+def calculate_square_perimeter(side):
+    <|user_cursor|>
+<|editable_region_end|>
+`````
+
 ## Example 5
 
 The user just deleted code, leaving behind what looks incomplete. You must NOT "complete" it by restoring deleted content—that would undo their edit. Output NO_EDITS. **This is the correct response even though the code appears broken.**
@@ -269,6 +331,66 @@ The user deleted `ashdb` from `/tmp/crashdb`, leaving `/tmp/cr`. Although this l
 NO_EDITS
 `````
 
+## Example 6
+
+The user accepted a prediction for a function, then started renaming it. The original arguments were auto-generated (marked with `// User accepted prediction:`), so they CAN be updated to match the new function name. This is NOT reverting user input—it's improving auto-generated scaffolding.
+
+### User Edit History
+
+`````
+--- a/math_utils.py
++++ b/math_utils.py
+@@ -3,3 +3,5 @@
+ def calculate_rectangle_area(width, height):
+     return width * height
+
++de
+
+// User accepted prediction:
+--- a/math_utils.py
++++ b/math_utils.py
+@@ -3,5 +3,7 @@
+ def calculate_rectangle_area(width, height):
+     return width * height
+
+-de
++def calculate_rectangle_perimeter(width, height):
++
+
+--- a/math_utils.py
++++ b/math_utils.py
+@@ -5,5 +5,5 @@
+     return width * height
+
+-def calculate_rectangle_perimeter(width, height):
++def calculate_sq_perimeter(width, height):
+
+`````
+
+### Current File
+
+`````math_utils.py
+def calculate_rectangle_area(width, height):
+    return width * height
+
+<|editable_region_start|>
+def calculate_sq<|user_cursor|>_perimeter(width, height):
+
+<|editable_region_end|>
+`````
+
+### Output
+
+The user accepted a prediction for `calculate_rectangle_perimeter(width, height)`, then started renaming `rectangle` to `square`. Since squares have equal sides, the arguments should change from `(width, height)` to `(side)`. The arguments were auto-generated (from an accepted prediction), so modifying them is appropriate.
+
+`````
+<|editable_region_start|>
+def calculate_square_perimeter(side):
+    <|user_cursor|>
+<|editable_region_end|>
+`````
+
+
 
 # Your task: