Fix leak detector causing panics in unit evals (#56029)

Bennet Bo Fenner created

Fixed an issue where the leak detector would sometimes cause panics when
running unit evals. Fixed this by matching the tear-down logic that we
use in the `gpui::test` macro

> thread 'tools::evals::edit_file::eval_from_pixels_constructor'
(14336149) panicked at crates/gpui/src/app/entity_map.rs:1116:9:
Exited with leaked handles:
Leaked handle for entity language::buffer::Buffer (EntityId(50v1)):

Release Notes:

- N/A

Change summary

crates/agent/src/tools/evals.rs               | 46 ++++++++++++++++++++
crates/agent/src/tools/evals/edit_file.rs     | 38 ++++++----------
crates/agent/src/tools/evals/terminal_tool.rs | 40 +++++++----------
crates/agent/src/tools/evals/write_file.rs    | 36 +++++----------
4 files changed, 90 insertions(+), 70 deletions(-)

Detailed changes

crates/agent/src/tools/evals.rs 🔗

@@ -1,6 +1,52 @@
+#[cfg(all(test, feature = "unit-eval"))]
+use futures::future::LocalBoxFuture;
+#[cfg(all(test, feature = "unit-eval"))]
+use gpui::TestAppContext;
+#[cfg(all(test, feature = "unit-eval"))]
+use std::fmt::Display;
+
 #[cfg(all(test, feature = "unit-eval"))]
 mod edit_file;
 #[cfg(all(test, feature = "unit-eval"))]
 mod terminal_tool;
 #[cfg(all(test, feature = "unit-eval"))]
 mod write_file;
+
+#[cfg(all(test, feature = "unit-eval"))]
+fn run_gpui_eval<T>(
+    eval: impl for<'a> FnOnce(&'a mut TestAppContext) -> LocalBoxFuture<'a, anyhow::Result<T>>,
+    outcome: impl FnOnce(&T) -> eval_utils::OutcomeKind,
+) -> eval_utils::EvalOutput<()>
+where
+    T: Display,
+{
+    let dispatcher = gpui::TestDispatcher::new(rand::random());
+    let mut cx = TestAppContext::build(dispatcher.clone(), None);
+    let entity_refcounts = cx.app.borrow().ref_counts_drop_handle();
+    let foreground_executor = cx.foreground_executor().clone();
+    let result = foreground_executor.block_test(eval(&mut cx));
+
+    cx.run_until_parked();
+    cx.update(|cx| {
+        cx.background_executor().forbid_parking();
+        cx.quit();
+    });
+    cx.run_until_parked();
+    drop(cx);
+    dispatcher.drain_tasks();
+    drop(dispatcher);
+    drop(entity_refcounts);
+
+    match result {
+        Ok(output) => eval_utils::EvalOutput {
+            data: output.to_string(),
+            outcome: outcome(&output),
+            metadata: (),
+        },
+        Err(err) => eval_utils::EvalOutput {
+            data: format!("{err:?}"),
+            outcome: eval_utils::OutcomeKind::Error,
+            metadata: (),
+        },
+    }
+}

crates/agent/src/tools/evals/edit_file.rs 🔗

@@ -547,33 +547,25 @@ impl EditToolTest {
 }
 
 fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> {
-    let dispatcher = gpui::TestDispatcher::new(rand::random());
-    let mut cx = TestAppContext::build(dispatcher, None);
-    let foreground_executor = cx.foreground_executor().clone();
-    let result = foreground_executor.block_test(async {
-        let test = EditToolTest::new(&mut cx).await;
-        let result = test.eval(eval, &mut cx).await;
-        drop(test);
-        cx.run_until_parked();
-        result
-    });
-    cx.quit();
-    match result {
-        Ok(output) => eval_utils::EvalOutput {
-            data: output.to_string(),
-            outcome: if output.assertion.score < 80 {
+    super::run_gpui_eval(
+        |cx| {
+            async move {
+                let test = EditToolTest::new(cx).await;
+                let result = test.eval(eval, cx).await;
+                drop(test);
+                cx.run_until_parked();
+                result
+            }
+            .boxed_local()
+        },
+        |output| {
+            if output.assertion.score < 80 {
                 eval_utils::OutcomeKind::Failed
             } else {
                 eval_utils::OutcomeKind::Passed
-            },
-            metadata: (),
-        },
-        Err(err) => eval_utils::EvalOutput {
-            data: format!("{err:?}"),
-            outcome: eval_utils::OutcomeKind::Error,
-            metadata: (),
+            }
         },
-    }
+    )
 }
 
 fn message(

crates/agent/src/tools/evals/terminal_tool.rs 🔗

@@ -2,7 +2,7 @@ use crate::{AgentTool, Template, Templates, TerminalTool, TerminalToolInput};
 use Role::*;
 use anyhow::{Context as _, Result};
 use client::{Client, RefreshLlmTokenListener, UserStore};
-use futures::StreamExt;
+use futures::{FutureExt as _, StreamExt};
 use gpui::{AppContext as _, AsyncApp, TestAppContext};
 use http_client::StatusCode;
 use language_model::{
@@ -428,33 +428,25 @@ async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) ->
 }
 
 fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> {
-    let dispatcher = gpui::TestDispatcher::new(rand::random());
-    let mut cx = TestAppContext::build(dispatcher, None);
-    let foreground_executor = cx.foreground_executor().clone();
-    let result = foreground_executor.block_test(async {
-        let test = TerminalToolTest::new(&mut cx).await;
-        let result = test.eval(eval, &mut cx).await;
-        drop(test);
-        cx.run_until_parked();
-        result
-    });
-    cx.quit();
-    match result {
-        Ok(output) => eval_utils::EvalOutput {
-            data: output.to_string(),
-            outcome: if output.assertion.score < 80 {
+    super::run_gpui_eval(
+        |cx| {
+            async move {
+                let test = TerminalToolTest::new(cx).await;
+                let result = test.eval(eval, cx).await;
+                drop(test);
+                cx.run_until_parked();
+                result
+            }
+            .boxed_local()
+        },
+        |output| {
+            if output.assertion.score < 80 {
                 eval_utils::OutcomeKind::Failed
             } else {
                 eval_utils::OutcomeKind::Passed
-            },
-            metadata: (),
-        },
-        Err(err) => eval_utils::EvalOutput {
-            data: format!("{err:?}"),
-            outcome: eval_utils::OutcomeKind::Error,
-            metadata: (),
+            }
         },
-    }
+    )
 }
 
 fn message(

crates/agent/src/tools/evals/write_file.rs 🔗

@@ -6,7 +6,7 @@ use Role::*;
 use anyhow::{Context as _, Result};
 use client::{Client, RefreshLlmTokenListener, UserStore};
 use fs::FakeFs;
-use futures::StreamExt;
+use futures::{FutureExt as _, StreamExt};
 use gpui::{AppContext as _, AsyncApp, Entity, TestAppContext, UpdateGlobal as _};
 use http_client::StatusCode;
 use language::language_settings::FormatOnSave;
@@ -365,29 +365,19 @@ impl WriteToolTest {
 }
 
 fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> {
-    let dispatcher = gpui::TestDispatcher::new(rand::random());
-    let mut cx = TestAppContext::build(dispatcher, None);
-    let foreground_executor = cx.foreground_executor().clone();
-    let result = foreground_executor.block_test(async {
-        let test = WriteToolTest::new(&mut cx).await;
-        let result = test.eval(eval, &mut cx).await;
-        drop(test);
-        cx.run_until_parked();
-        result
-    });
-    cx.quit();
-    match result {
-        Ok(output) => eval_utils::EvalOutput {
-            data: output.to_string(),
-            outcome: eval_utils::OutcomeKind::Passed,
-            metadata: (),
-        },
-        Err(err) => eval_utils::EvalOutput {
-            data: format!("{err:?}"),
-            outcome: eval_utils::OutcomeKind::Error,
-            metadata: (),
+    super::run_gpui_eval(
+        |cx| {
+            async move {
+                let test = WriteToolTest::new(cx).await;
+                let result = test.eval(eval, cx).await;
+                drop(test);
+                cx.run_until_parked();
+                result
+            }
+            .boxed_local()
         },
-    }
+        |_| eval_utils::OutcomeKind::Passed,
+    )
 }
 
 fn message(