diff --git a/crates/edit_prediction/src/example_spec.rs b/crates/edit_prediction/src/example_spec.rs index 69ca4aedbd654ae12303371fedf30d2ffbb10ef1..3053473fd0df5acf46d4bed25fc47e913437d8a6 100644 --- a/crates/edit_prediction/src/example_spec.rs +++ b/crates/edit_prediction/src/example_spec.rs @@ -181,6 +181,7 @@ const EDIT_HISTORY_HEADING: &str = "Edit History"; const CURSOR_POSITION_HEADING: &str = "Cursor Position"; const EXPECTED_PATCH_HEADING: &str = "Expected Patch"; const REJECTED_PATCH_HEADING: &str = "Rejected Patch"; +const ACCEPTED_PREDICTION_MARKER: &str = "// User accepted prediction:"; #[derive(Serialize, Deserialize)] struct FrontMatter<'a> { @@ -352,6 +353,7 @@ impl ExampleSpec { } let mut current_section = Section::Start; + let mut next_edit_predicted = false; for event in parser { match event { @@ -387,6 +389,12 @@ impl ExampleSpec { anyhow::bail!("Unexpected heading level: {level}"); } Event::Start(Tag::CodeBlock(kind)) => { + if current_section == Section::EditHistory + && text.trim() == ACCEPTED_PREDICTION_MARKER + { + next_edit_predicted = true; + } + text.clear(); match kind { CodeBlockKind::Fenced(info) => { block_info = info; @@ -407,6 +415,11 @@ impl ExampleSpec { spec.uncommitted_diff = mem::take(&mut text); } Section::EditHistory => { + if next_edit_predicted { + spec.edit_history + .push_str(&format!("{}\n", ACCEPTED_PREDICTION_MARKER)); + next_edit_predicted = false; + } spec.edit_history.push_str(&mem::take(&mut text)); } Section::CursorPosition => { @@ -908,4 +921,81 @@ mod tests { let results = spec.expected_patches_with_cursor_positions(); assert_eq!(results, vec![(clean_patch, None)]); } + + #[test] + fn test_from_markdown_accepted_prediction_marker() { + let markdown = indoc! {r#" + +++ + repository_url = "https://github.com/example/repo" + revision = "abc123" + +++ + + ## Edit History + + ```diff + --- a/src/main.rs + +++ b/src/main.rs + @@ -1,3 +1,3 @@ + -fn hello() {} + +fn hello_world() {} + ``` + + // User accepted prediction: + ```diff + --- a/src/main.rs + +++ b/src/main.rs + @@ -1,3 +1,3 @@ + -fn hello_world() {} + +fn hello_world() { println!("hi"); } + ``` + + ```diff + --- a/src/main.rs + +++ b/src/main.rs + @@ -1,3 +1,3 @@ + -fn hello_world() { println!("hi"); } + +fn hello_world() { println!("hello"); } + ``` + + ## Cursor Position + + ```src/main.rs + fn hello_world() { println!("hello"); } + # ^[CURSOR_POSITION] + ``` + + ## Expected Patch + + ```diff + --- a/src/main.rs + +++ b/src/main.rs + @@ -1,3 +1,3 @@ + -fn hello_world() { println!("hello"); } + +fn hello_world() { println!("hello, world!"); } + ``` + "#}; + + let spec = ExampleSpec::from_markdown(markdown).unwrap(); + + // The first diff should NOT have the marker + assert!(spec.edit_history.starts_with("--- a/src/main.rs")); + + // The second diff should be preceded by the accepted prediction marker + assert!( + spec.edit_history + .contains("// User accepted prediction:\n--- a/src/main.rs") + ); + + // Count occurrences of the marker - should be exactly one + let marker_count = spec + .edit_history + .matches("// User accepted prediction:") + .count(); + assert_eq!(marker_count, 1); + + // The third diff should NOT have the marker + // Verify all three diffs are present + let diff_count = spec.edit_history.matches("--- a/src/main.rs").count(); + assert_eq!(diff_count, 3); + } } diff --git a/crates/edit_prediction_cli/evals/flask--rename-accepted-prediction.md b/crates/edit_prediction_cli/evals/flask--rename-accepted-prediction.md new file mode 100644 index 0000000000000000000000000000000000000000..59d3764361133f00d3291bd8ac6804da8e0bdf91 --- /dev/null +++ b/crates/edit_prediction_cli/evals/flask--rename-accepted-prediction.md @@ -0,0 +1,98 @@ ++++ +repository_url = "https://github.com/pallets/flask" +revision = "2fec0b206c6e83ea813ab26597e15c96fab08be7" ++++ + +## Edit History + +```diff +--- a/tests/test_basic.py ++++ b/tests/test_basic.py +@@ -356,3 +356,6 @@ + cookie = rv.headers["set-cookie"].lower() + assert "samesite=lax" in cookie + + ++de ++ ++ + def test_missing_session(app): +``` + +// User accepted prediction: +```diff +--- a/tests/test_basic.py ++++ b/tests/test_basic.py +@@ -358,6 +358,14 @@ + + +-de ++def test_session_cookie_httponly(app, client): ++ app.config["SESSION_COOKIE_HTTPONLY"] = True ++ ++ @app.route("/") ++ def index(): ++ flask.session["testing"] = 42 ++ return "Hello World" ++ ++ rv = client.get("/") ++ assert "httponly" in rv.headers["set-cookie"].lower() + + + def test_missing_session(app): +``` + +```diff +--- a/tests/test_basic.py ++++ b/tests/test_basic.py +@@ -358,14 +358,14 @@ + + +-def test_session_cookie_httponly(app, client): ++def test_session_cookie_secur(app, client): + app.config["SESSION_COOKIE_HTTPONLY"] = True +``` + +## Cursor Position + +```tests/test_basic.py + cookie = rv.headers["set-cookie"].lower() + assert "samesite=lax" in cookie + + +def test_session_cookie_secur(app, client): +# ^[CURSOR_POSITION] + app.config["SESSION_COOKIE_HTTPONLY"] = True + + @app.route("/") + def index(): + flask.session["testing"] = 42 + return "Hello World" + + rv = client.get("/") + assert "httponly" in rv.headers["set-cookie"].lower() + + +def test_missing_session(app): +``` + +## Expected Patch + +```diff +--- a/tests/test_basic.py ++++ b/tests/test_basic.py +@@ -358,14 +358,14 @@ +-def test_session_cookie_secur(app, client): +- app.config["SESSION_COOKIE_HTTPONLY"] = True ++def test_session_cookie_secure(app, client): ++ app.config["SESSION_COOKIE_SECURE"] = True + + @app.route("/") + def index(): + flask.session["testing"] = 42 + return "Hello World" + + rv = client.get("/") +- assert "httponly" in rv.headers["set-cookie"].lower() ++ assert "secure" in rv.headers["set-cookie"].lower() +``` diff --git a/crates/edit_prediction_cli/evals/hello-world--rename-accepted-group-by.md b/crates/edit_prediction_cli/evals/hello-world--rename-accepted-group-by.md new file mode 100644 index 0000000000000000000000000000000000000000..a16214bfc62f2acec2dc13d71871893c8e5f50d4 --- /dev/null +++ b/crates/edit_prediction_cli/evals/hello-world--rename-accepted-group-by.md @@ -0,0 +1,81 @@ ++++ +repository_url = "https://github.com/octocat/hello-world" +revision = "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d" ++++ + +## Edit History + +```diff +--- a/README ++++ b/README +@@ -1,1 +1,6 @@ +-Hello World! ++function filterByStatus(items, status) { ++ return items.filter(item => item.status === status); ++} ++ ++function groupBy ++ +``` + +// User accepted prediction: +```diff +--- a/README ++++ b/README +@@ -4,3 +4,9 @@ + +-function groupBy ++function groupByStatus(items) { ++ return items.reduce((groups, item) => { ++ const key = item.status; ++ (groups[key] = groups[key] || []).push(item); ++ return groups; ++ }, {}); ++} + +``` + +```diff +--- a/README ++++ b/README +@@ -4,4 +4,4 @@ + +-function groupByStatus(items) { ++function groupByCat(items) { + return items.reduce((groups, item) => { +``` + +## Cursor Position + +```README +function filterByStatus(items, status) { + return items.filter(item => item.status === status); +} + +function groupByCat(items) { +# ^[CURSOR_POSITION] + return items.reduce((groups, item) => { + const key = item.status; + (groups[key] = groups[key] || []).push(item); + return groups; + }, {}); +} + +``` + +## Expected Patch + +```diff +--- a/README ++++ b/README +@@ -5,7 +5,7 @@ +-function groupByCat(items) { ++function groupByCategory(items) { +# ^[CURSOR_POSITION] + return items.reduce((groups, item) => { +- const key = item.status; ++ const key = item.category; + (groups[key] = groups[key] || []).push(item); + return groups; + }, {}); +``` diff --git a/crates/edit_prediction_cli/src/prompts/teacher.md b/crates/edit_prediction_cli/src/prompts/teacher.md index b5a07000a9d31144ab4886d2a3db4da03181ecf4..524109c7a418fbc54402cf53e28c1b67259009f7 100644 --- a/crates/edit_prediction_cli/src/prompts/teacher.md +++ b/crates/edit_prediction_cli/src/prompts/teacher.md @@ -238,65 +238,6 @@ The user just fixed a bug in the `add` function, changing subtraction to additio NO_EDITS ````` -## Example 6 - -The user accepted a prediction for a function, then started renaming it. The original arguments were auto-generated (marked with `// User accepted prediction:`), so they CAN be updated to match the new function name. This is NOT reverting user input—it's improving auto-generated scaffolding. - -### User Edit History - -````` ---- a/math_utils.py -+++ b/math_utils.py -@@ -3,3 +3,5 @@ - def calculate_rectangle_area(width, height): - return width * height - -+de - -// User accepted prediction: ---- a/math_utils.py -+++ b/math_utils.py -@@ -3,5 +3,7 @@ - def calculate_rectangle_area(width, height): - return width * height - --de -+def calculate_rectangle_perimeter(width, height): -+ - ---- a/math_utils.py -+++ b/math_utils.py -@@ -5,5 +5,5 @@ - return width * height - --def calculate_rectangle_perimeter(width, height): -+def calculate_sq_perimeter(width, height): - -````` - -### Current File - -`````math_utils.py -def calculate_rectangle_area(width, height): - return width * height - -<|editable_region_start|> -def calculate_sq<|user_cursor|>_perimeter(width, height): - -<|editable_region_end|> -````` - -### Output - -The user accepted a prediction for `calculate_rectangle_perimeter(width, height)`, then started renaming `rectangle` to `square`. Since squares have equal sides, the arguments should change from `(width, height)` to `(side)`. The arguments were auto-generated (from an accepted prediction), so modifying them is appropriate. - -````` -<|editable_region_start|> -def calculate_square_perimeter(side): - <|user_cursor|> -<|editable_region_end|> -````` - ## Example 5 The user just deleted code, leaving behind what looks incomplete. You must NOT "complete" it by restoring deleted content—that would undo their edit. Output NO_EDITS. **This is the correct response even though the code appears broken.** diff --git a/typos.toml b/typos.toml index 402fb6169297619b7f24aa59f6a817918eba81a7..f2240ad01b84c59cf3ce4cb538f5b0b19bfe343e 100644 --- a/typos.toml +++ b/typos.toml @@ -60,6 +60,8 @@ extend-exclude = [ "crates/gpui/src/platform/mac/dispatcher.rs", # Tests contain partially incomplete words (by design) "crates/edit_prediction_cli/src/split_commit.rs", + # Eval examples contain intentionally partial words (e.g. "secur" for "secure") + "crates/edit_prediction_cli/evals/", # Tests contain `baˇr` that cause `"ba" should be "by" or "be".`-like false-positives "crates/editor/src/document_symbols.rs", ]