evals.rs

 1use std::str::FromStr;
 2
 3use crate::inline_assistant::test::run_inline_assistant_test;
 4
 5use eval_utils::{EvalOutput, NoProcessor};
 6use gpui::TestAppContext;
 7use language_model::{LanguageModelRegistry, SelectedModel};
 8use rand::{SeedableRng as _, rngs::StdRng};
 9
10#[test]
11#[cfg_attr(not(feature = "unit-eval"), ignore)]
12fn eval_single_cursor_edit() {
13    eval_utils::eval(20, 1.0, NoProcessor, move || {
14        run_eval(
15            &EvalInput {
16                prompt: "Rename this variable to buffer_text".to_string(),
17                buffer: indoc::indoc! {"
18                    struct EvalExampleStruct {
19                        text: Strˇing,
20                        prompt: String,
21                    }
22                "}
23                .to_string(),
24            },
25            &|_, output| {
26                let expected = indoc::indoc! {"
27                    struct EvalExampleStruct {
28                        buffer_text: String,
29                        prompt: String,
30                    }
31                    "};
32                if output == expected {
33                    EvalOutput {
34                        outcome: eval_utils::OutcomeKind::Passed,
35                        data: "Passed!".to_string(),
36                        metadata: (),
37                    }
38                } else {
39                    EvalOutput {
40                        outcome: eval_utils::OutcomeKind::Failed,
41                        data: format!("Failed to rename variable, output: {}", output),
42                        metadata: (),
43                    }
44                }
45            },
46        )
47    });
48}
49
50struct EvalInput {
51    buffer: String,
52    prompt: String,
53}
54
55fn run_eval(
56    input: &EvalInput,
57    judge: &dyn Fn(&EvalInput, &str) -> eval_utils::EvalOutput<()>,
58) -> eval_utils::EvalOutput<()> {
59    let dispatcher = gpui::TestDispatcher::new(StdRng::from_os_rng());
60    let mut cx = TestAppContext::build(dispatcher, None);
61    cx.skip_drawing();
62
63    let buffer_text = run_inline_assistant_test(
64        input.buffer.clone(),
65        input.prompt.clone(),
66        |cx| {
67            // Reconfigure to use a real model instead of the fake one
68            let model_name = std::env::var("ZED_AGENT_MODEL")
69                .unwrap_or("anthropic/claude-sonnet-4-latest".into());
70
71            let selected_model = SelectedModel::from_str(&model_name)
72                .expect("Invalid model format. Use 'provider/model-id'");
73
74            log::info!("Selected model: {selected_model:?}");
75
76            cx.update(|_, cx| {
77                LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
78                    registry.select_inline_assistant_model(Some(&selected_model), cx);
79                });
80            });
81        },
82        |_cx| {
83            log::info!("Waiting for actual response from the LLM...");
84        },
85        &mut cx,
86    );
87
88    judge(input, &buffer_text)
89}