From 47ea7de9c8e154fa5ba27c36d0907ea0a60c51e9 Mon Sep 17 00:00:00 2001 From: Bennet Bo Fenner Date: Thu, 7 May 2026 12:51:34 +0200 Subject: [PATCH] Fix leak detector causing panics in unit evals (#56029) Fixed an issue where the leak detector would sometimes cause panics when running unit evals. Fixed this by matching the tear-down logic that we use in the `gpui::test` macro > thread 'tools::evals::edit_file::eval_from_pixels_constructor' (14336149) panicked at crates/gpui/src/app/entity_map.rs:1116:9: Exited with leaked handles: Leaked handle for entity language::buffer::Buffer (EntityId(50v1)): Release Notes: - N/A --- crates/agent/src/tools/evals.rs | 46 +++++++++++++++++++ crates/agent/src/tools/evals/edit_file.rs | 38 ++++++--------- crates/agent/src/tools/evals/terminal_tool.rs | 40 +++++++--------- crates/agent/src/tools/evals/write_file.rs | 36 ++++++--------- 4 files changed, 90 insertions(+), 70 deletions(-) diff --git a/crates/agent/src/tools/evals.rs b/crates/agent/src/tools/evals.rs index 3096068931161d0963028b28af12a1d178ca9b0d..ac11ffe74a03a47e4de24a5af126b7b3309e2cdf 100644 --- a/crates/agent/src/tools/evals.rs +++ b/crates/agent/src/tools/evals.rs @@ -1,6 +1,52 @@ +#[cfg(all(test, feature = "unit-eval"))] +use futures::future::LocalBoxFuture; +#[cfg(all(test, feature = "unit-eval"))] +use gpui::TestAppContext; +#[cfg(all(test, feature = "unit-eval"))] +use std::fmt::Display; + #[cfg(all(test, feature = "unit-eval"))] mod edit_file; #[cfg(all(test, feature = "unit-eval"))] mod terminal_tool; #[cfg(all(test, feature = "unit-eval"))] mod write_file; + +#[cfg(all(test, feature = "unit-eval"))] +fn run_gpui_eval( + eval: impl for<'a> FnOnce(&'a mut TestAppContext) -> LocalBoxFuture<'a, anyhow::Result>, + outcome: impl FnOnce(&T) -> eval_utils::OutcomeKind, +) -> eval_utils::EvalOutput<()> +where + T: Display, +{ + let dispatcher = gpui::TestDispatcher::new(rand::random()); + let mut cx = TestAppContext::build(dispatcher.clone(), None); + let entity_refcounts = cx.app.borrow().ref_counts_drop_handle(); + let foreground_executor = cx.foreground_executor().clone(); + let result = foreground_executor.block_test(eval(&mut cx)); + + cx.run_until_parked(); + cx.update(|cx| { + cx.background_executor().forbid_parking(); + cx.quit(); + }); + cx.run_until_parked(); + drop(cx); + dispatcher.drain_tasks(); + drop(dispatcher); + drop(entity_refcounts); + + match result { + Ok(output) => eval_utils::EvalOutput { + data: output.to_string(), + outcome: outcome(&output), + metadata: (), + }, + Err(err) => eval_utils::EvalOutput { + data: format!("{err:?}"), + outcome: eval_utils::OutcomeKind::Error, + metadata: (), + }, + } +} diff --git a/crates/agent/src/tools/evals/edit_file.rs b/crates/agent/src/tools/evals/edit_file.rs index 4c96b0797f87709ba93a40040110bbf6c943990a..79c5a7c2689524dc9415c58b85609c1cac57ba31 100644 --- a/crates/agent/src/tools/evals/edit_file.rs +++ b/crates/agent/src/tools/evals/edit_file.rs @@ -547,33 +547,25 @@ impl EditToolTest { } fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> { - let dispatcher = gpui::TestDispatcher::new(rand::random()); - let mut cx = TestAppContext::build(dispatcher, None); - let foreground_executor = cx.foreground_executor().clone(); - let result = foreground_executor.block_test(async { - let test = EditToolTest::new(&mut cx).await; - let result = test.eval(eval, &mut cx).await; - drop(test); - cx.run_until_parked(); - result - }); - cx.quit(); - match result { - Ok(output) => eval_utils::EvalOutput { - data: output.to_string(), - outcome: if output.assertion.score < 80 { + super::run_gpui_eval( + |cx| { + async move { + let test = EditToolTest::new(cx).await; + let result = test.eval(eval, cx).await; + drop(test); + cx.run_until_parked(); + result + } + .boxed_local() + }, + |output| { + if output.assertion.score < 80 { eval_utils::OutcomeKind::Failed } else { eval_utils::OutcomeKind::Passed - }, - metadata: (), - }, - Err(err) => eval_utils::EvalOutput { - data: format!("{err:?}"), - outcome: eval_utils::OutcomeKind::Error, - metadata: (), + } }, - } + ) } fn message( diff --git a/crates/agent/src/tools/evals/terminal_tool.rs b/crates/agent/src/tools/evals/terminal_tool.rs index 3769df5abed0bc2de9f9522c51773c88fda7c19e..92ebd61d1622d50578fc70526b0d66c741a0c89b 100644 --- a/crates/agent/src/tools/evals/terminal_tool.rs +++ b/crates/agent/src/tools/evals/terminal_tool.rs @@ -2,7 +2,7 @@ use crate::{AgentTool, Template, Templates, TerminalTool, TerminalToolInput}; use Role::*; use anyhow::{Context as _, Result}; use client::{Client, RefreshLlmTokenListener, UserStore}; -use futures::StreamExt; +use futures::{FutureExt as _, StreamExt}; use gpui::{AppContext as _, AsyncApp, TestAppContext}; use http_client::StatusCode; use language_model::{ @@ -428,33 +428,25 @@ async fn retry_on_rate_limit(mut request: impl AsyncFnMut() -> Result) -> } fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> { - let dispatcher = gpui::TestDispatcher::new(rand::random()); - let mut cx = TestAppContext::build(dispatcher, None); - let foreground_executor = cx.foreground_executor().clone(); - let result = foreground_executor.block_test(async { - let test = TerminalToolTest::new(&mut cx).await; - let result = test.eval(eval, &mut cx).await; - drop(test); - cx.run_until_parked(); - result - }); - cx.quit(); - match result { - Ok(output) => eval_utils::EvalOutput { - data: output.to_string(), - outcome: if output.assertion.score < 80 { + super::run_gpui_eval( + |cx| { + async move { + let test = TerminalToolTest::new(cx).await; + let result = test.eval(eval, cx).await; + drop(test); + cx.run_until_parked(); + result + } + .boxed_local() + }, + |output| { + if output.assertion.score < 80 { eval_utils::OutcomeKind::Failed } else { eval_utils::OutcomeKind::Passed - }, - metadata: (), - }, - Err(err) => eval_utils::EvalOutput { - data: format!("{err:?}"), - outcome: eval_utils::OutcomeKind::Error, - metadata: (), + } }, - } + ) } fn message( diff --git a/crates/agent/src/tools/evals/write_file.rs b/crates/agent/src/tools/evals/write_file.rs index f34528fcd78577757a2189ae77389d35a1e0bd4a..60eda3ab3e651329b9b3269846a10b8a282907ce 100644 --- a/crates/agent/src/tools/evals/write_file.rs +++ b/crates/agent/src/tools/evals/write_file.rs @@ -6,7 +6,7 @@ use Role::*; use anyhow::{Context as _, Result}; use client::{Client, RefreshLlmTokenListener, UserStore}; use fs::FakeFs; -use futures::StreamExt; +use futures::{FutureExt as _, StreamExt}; use gpui::{AppContext as _, AsyncApp, Entity, TestAppContext, UpdateGlobal as _}; use http_client::StatusCode; use language::language_settings::FormatOnSave; @@ -365,29 +365,19 @@ impl WriteToolTest { } fn run_eval(eval: EvalInput) -> eval_utils::EvalOutput<()> { - let dispatcher = gpui::TestDispatcher::new(rand::random()); - let mut cx = TestAppContext::build(dispatcher, None); - let foreground_executor = cx.foreground_executor().clone(); - let result = foreground_executor.block_test(async { - let test = WriteToolTest::new(&mut cx).await; - let result = test.eval(eval, &mut cx).await; - drop(test); - cx.run_until_parked(); - result - }); - cx.quit(); - match result { - Ok(output) => eval_utils::EvalOutput { - data: output.to_string(), - outcome: eval_utils::OutcomeKind::Passed, - metadata: (), - }, - Err(err) => eval_utils::EvalOutput { - data: format!("{err:?}"), - outcome: eval_utils::OutcomeKind::Error, - metadata: (), + super::run_gpui_eval( + |cx| { + async move { + let test = WriteToolTest::new(cx).await; + let result = test.eval(eval, cx).await; + drop(test); + cx.run_until_parked(); + result + } + .boxed_local() }, - } + |_| eval_utils::OutcomeKind::Passed, + ) } fn message(