evals: Switch disable_cursor_blinking to determenistic asserts (#34398)

Oleksiy Syvokon created

Release Notes:

- N/A

Change summary

crates/assistant_tools/src/edit_agent/evals.rs                                                | 24 
crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-01.diff | 28 
crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-02.diff | 29 
crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-03.diff | 34 
crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-04.diff | 33 
5 files changed, 137 insertions(+), 11 deletions(-)

Detailed changes

crates/assistant_tools/src/edit_agent/evals.rs 🔗

@@ -365,17 +365,23 @@ fn eval_disable_cursor_blinking() {
     //  Model                          | Pass rate
     // ============================================
     //
-    //  claude-3.7-sonnet              |  0.99 (2025-06-14)
-    //  claude-sonnet-4                |  0.85 (2025-06-14)
-    //  gemini-2.5-pro-preview-latest  |  0.97 (2025-06-16)
-    //  gemini-2.5-flash-preview-04-17 |
-    //  gpt-4.1                        |
+    //  claude-3.7-sonnet              |  0.59 (2025-07-14)
+    //  claude-sonnet-4                |  0.81 (2025-07-14)
+    //  gemini-2.5-pro                 |  0.95 (2025-07-14)
+    //  gemini-2.5-flash-preview-04-17 |  0.78 (2025-07-14)
+    //  gpt-4.1                        |  0.00 (2025-07-14) (follows edit_description too literally)
     let input_file_path = "root/editor.rs";
     let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
     let edit_description = "Comment out the call to `BlinkManager::enable`";
+    let possible_diffs = vec![
+        include_str!("evals/fixtures/disable_cursor_blinking/possible-01.diff"),
+        include_str!("evals/fixtures/disable_cursor_blinking/possible-02.diff"),
+        include_str!("evals/fixtures/disable_cursor_blinking/possible-03.diff"),
+        include_str!("evals/fixtures/disable_cursor_blinking/possible-04.diff"),
+    ];
     eval(
         100,
-        0.95,
+        0.51,
         0.05,
         EvalInput::from_conversation(
             vec![
@@ -433,11 +439,7 @@ fn eval_disable_cursor_blinking() {
                 ),
             ],
             Some(input_file_content.into()),
-            EvalAssertion::judge_diff(indoc! {"
-                - Calls to BlinkManager in `observe_window_activation` were commented out
-                - The call to `blink_manager.enable` above the call to show_cursor_names was commented out
-                - All the edits have valid indentation
-            "}),
+            EvalAssertion::assert_diff_any(possible_diffs),
         ),
     );
 }

crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-01.diff 🔗

@@ -0,0 +1,28 @@
+--- before.rs	2025-07-07 11:37:48.434629001 +0300
++++ expected.rs	2025-07-14 10:33:53.346906775 +0300
+@@ -1780,11 +1780,11 @@
+                 cx.observe_window_activation(window, |editor, window, cx| {
+                     let active = window.is_window_active();
+                     editor.blink_manager.update(cx, |blink_manager, cx| {
+-                        if active {
+-                            blink_manager.enable(cx);
+-                        } else {
+-                            blink_manager.disable(cx);
+-                        }
++                        // if active {
++                        //     blink_manager.enable(cx);
++                        // } else {
++                        //     blink_manager.disable(cx);
++                        // }
+                     });
+                 }),
+             ],
+@@ -18463,7 +18463,7 @@
+             }
+ 
+             self.blink_manager.update(cx, |blink_manager, cx| {
+-                blink_manager.enable(cx);
++                // blink_manager.enable(cx);
+             });
+             self.show_cursor_names(window, cx);
+             self.buffer.update(cx, |buffer, cx| {

crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-02.diff 🔗

@@ -0,0 +1,29 @@
+@@ -1778,13 +1778,13 @@
+                 cx.observe_global_in::<SettingsStore>(window, Self::settings_changed),
+                 observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()),
+                 cx.observe_window_activation(window, |editor, window, cx| {
+-                    let active = window.is_window_active();
++                    // let active = window.is_window_active();
+                     editor.blink_manager.update(cx, |blink_manager, cx| {
+-                        if active {
+-                            blink_manager.enable(cx);
+-                        } else {
+-                            blink_manager.disable(cx);
+-                        }
++                        // if active {
++                        //     blink_manager.enable(cx);
++                        // } else {
++                        //     blink_manager.disable(cx);
++                        // }
+                     });
+                 }),
+             ],
+@@ -18463,7 +18463,7 @@
+             }
+ 
+             self.blink_manager.update(cx, |blink_manager, cx| {
+-                blink_manager.enable(cx);
++                // blink_manager.enable(cx);
+             });
+             self.show_cursor_names(window, cx);
+             self.buffer.update(cx, |buffer, cx| {

crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-03.diff 🔗

@@ -0,0 +1,34 @@
+@@ -1774,17 +1774,17 @@
+                 cx.observe(&buffer, Self::on_buffer_changed),
+                 cx.subscribe_in(&buffer, window, Self::on_buffer_event),
+                 cx.observe_in(&display_map, window, Self::on_display_map_changed),
+-                cx.observe(&blink_manager, |_, _, cx| cx.notify()),
++                // cx.observe(&blink_manager, |_, _, cx| cx.notify()),
+                 cx.observe_global_in::<SettingsStore>(window, Self::settings_changed),
+                 observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()),
+                 cx.observe_window_activation(window, |editor, window, cx| {
+-                    let active = window.is_window_active();
++                    // let active = window.is_window_active();
+                     editor.blink_manager.update(cx, |blink_manager, cx| {
+-                        if active {
+-                            blink_manager.enable(cx);
+-                        } else {
+-                            blink_manager.disable(cx);
+-                        }
++                        // if active {
++                        //     blink_manager.enable(cx);
++                        // } else {
++                        //     blink_manager.disable(cx);
++                        // }
+                     });
+                 }),
+             ],
+@@ -18463,7 +18463,7 @@
+             }
+ 
+             self.blink_manager.update(cx, |blink_manager, cx| {
+-                blink_manager.enable(cx);
++                // blink_manager.enable(cx);
+             });
+             self.show_cursor_names(window, cx);
+             self.buffer.update(cx, |buffer, cx| {

crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-04.diff 🔗

@@ -0,0 +1,33 @@
+@@ -1774,17 +1774,17 @@
+                 cx.observe(&buffer, Self::on_buffer_changed),
+                 cx.subscribe_in(&buffer, window, Self::on_buffer_event),
+                 cx.observe_in(&display_map, window, Self::on_display_map_changed),
+-                cx.observe(&blink_manager, |_, _, cx| cx.notify()),
++                // cx.observe(&blink_manager, |_, _, cx| cx.notify()),
+                 cx.observe_global_in::<SettingsStore>(window, Self::settings_changed),
+                 observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()),
+                 cx.observe_window_activation(window, |editor, window, cx| {
+                     let active = window.is_window_active();
+                     editor.blink_manager.update(cx, |blink_manager, cx| {
+-                        if active {
+-                            blink_manager.enable(cx);
+-                        } else {
+-                            blink_manager.disable(cx);
+-                        }
++                        // if active {
++                        //     blink_manager.enable(cx);
++                        // } else {
++                        //     blink_manager.disable(cx);
++                        // }
+                     });
+                 }),
+             ],
+@@ -18463,7 +18463,7 @@
+             }
+ 
+             self.blink_manager.update(cx, |blink_manager, cx| {
+-                blink_manager.enable(cx);
++                // blink_manager.enable(cx);
+             });
+             self.show_cursor_names(window, cx);
+             self.buffer.update(cx, |buffer, cx| {