1use std::str::FromStr;
2
3use crate::inline_assistant::test::run_inline_assistant_test;
4
5use eval_utils::{EvalOutput, NoProcessor};
6use gpui::TestAppContext;
7use language_model::{LanguageModelRegistry, SelectedModel};
8use rand::{SeedableRng as _, rngs::StdRng};
9
10#[test]
11#[cfg_attr(not(feature = "unit-eval"), ignore)]
12fn eval_single_cursor_edit() {
13 eval_utils::eval(20, 1.0, NoProcessor, move || {
14 run_eval(
15 &EvalInput {
16 prompt: "Rename this variable to buffer_text".to_string(),
17 buffer: indoc::indoc! {"
18 struct EvalExampleStruct {
19 text: Strˇing,
20 prompt: String,
21 }
22 "}
23 .to_string(),
24 },
25 &|_, output| {
26 let expected = indoc::indoc! {"
27 struct EvalExampleStruct {
28 buffer_text: String,
29 prompt: String,
30 }
31 "};
32 if output == expected {
33 EvalOutput {
34 outcome: eval_utils::OutcomeKind::Passed,
35 data: "Passed!".to_string(),
36 metadata: (),
37 }
38 } else {
39 EvalOutput {
40 outcome: eval_utils::OutcomeKind::Failed,
41 data: format!("Failed to rename variable, output: {}", output),
42 metadata: (),
43 }
44 }
45 },
46 )
47 });
48}
49
50struct EvalInput {
51 buffer: String,
52 prompt: String,
53}
54
55fn run_eval(
56 input: &EvalInput,
57 judge: &dyn Fn(&EvalInput, &str) -> eval_utils::EvalOutput<()>,
58) -> eval_utils::EvalOutput<()> {
59 let dispatcher = gpui::TestDispatcher::new(StdRng::from_os_rng());
60 let mut cx = TestAppContext::build(dispatcher, None);
61 cx.skip_drawing();
62
63 let buffer_text = run_inline_assistant_test(
64 input.buffer.clone(),
65 input.prompt.clone(),
66 |cx| {
67 // Reconfigure to use a real model instead of the fake one
68 let model_name = std::env::var("ZED_AGENT_MODEL")
69 .unwrap_or("anthropic/claude-sonnet-4-latest".into());
70
71 let selected_model = SelectedModel::from_str(&model_name)
72 .expect("Invalid model format. Use 'provider/model-id'");
73
74 log::info!("Selected model: {selected_model:?}");
75
76 cx.update(|_, cx| {
77 LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
78 registry.select_inline_assistant_model(Some(&selected_model), cx);
79 });
80 });
81 },
82 |_cx| {
83 log::info!("Waiting for actual response from the LLM...");
84 },
85 &mut cx,
86 );
87
88 judge(input, &buffer_text)
89}