example.rs

  1use agent::{RequestKind, ThreadEvent, ThreadStore};
  2use anyhow::{Context as _, Result, anyhow};
  3use assistant_tool::ToolWorkingSet;
  4use client::proto::LspWorkProgress;
  5use collections::HashMap;
  6use dap::DapRegistry;
  7use futures::channel::mpsc;
  8use futures::{FutureExt, StreamExt as _, select_biased};
  9use gpui::{App, AppContext as _, AsyncApp, Entity, Task};
 10use handlebars::Handlebars;
 11use language::{DiagnosticSeverity, OffsetRangeExt};
 12use language_model::{
 13    LanguageModel, LanguageModelRequest, LanguageModelRequestMessage, MessageContent, Role,
 14    StopReason, TokenUsage,
 15};
 16use project::{LspStore, Project, ProjectPath};
 17use serde::{Deserialize, Serialize};
 18use std::fmt::Write as _;
 19use std::fs::File;
 20use std::io::Write as _;
 21use std::sync::{Arc, Mutex};
 22use std::time::Duration;
 23use std::{
 24    fs,
 25    path::{Path, PathBuf},
 26};
 27use unindent::Unindent as _;
 28use util::ResultExt as _;
 29use util::command::new_smol_command;
 30use util::serde::default_true;
 31
 32use crate::AgentAppState;
 33
 34pub const EXAMPLES_DIR: &str = "./crates/eval/examples";
 35pub const REPOS_DIR: &str = "./crates/eval/repos";
 36pub const WORKTREES_DIR: &str = "./crates/eval/worktrees";
 37
 38const THREAD_EVENT_TIMEOUT: Duration = Duration::from_secs(60 * 2);
 39
 40#[derive(Clone, Debug, Deserialize)]
 41pub struct ExampleBase {
 42    pub url: String,
 43    pub revision: String,
 44    pub language_extension: Option<String>,
 45    pub insert_id: Option<String>,
 46    #[serde(default = "default_true")]
 47    pub require_lsp: bool,
 48}
 49
 50#[derive(Clone, Debug)]
 51pub struct Example {
 52    pub name: String,
 53    /// Content of `base.toml`
 54    pub base: ExampleBase,
 55    /// Content of `prompt.md`
 56    pub prompt: String,
 57    /// Content of `criteria.md`
 58    pub criteria: String,
 59    /// Markdown output file to append to
 60    pub output_file: Option<Arc<Mutex<File>>>,
 61    /// Path to the output run directory.
 62    pub run_dir: PathBuf,
 63    /// Path to markdown output file
 64    pub output_file_path: PathBuf,
 65    /// Prefix used for logging that identifies this example
 66    pub log_prefix: String,
 67}
 68
 69#[derive(Debug, Serialize, Deserialize, Clone)]
 70pub struct RunOutput {
 71    pub repository_diff: String,
 72    pub diagnostics: String,
 73    pub response_count: usize,
 74    pub token_usage: TokenUsage,
 75    pub tool_use_counts: HashMap<Arc<str>, u32>,
 76}
 77
 78#[derive(Debug, Clone, Serialize, Deserialize)]
 79pub struct JudgeInput {
 80    pub repository_diff: String,
 81    pub criteria: String,
 82}
 83
 84#[derive(Debug, Clone, Serialize, Deserialize)]
 85pub struct JudgeOutput {
 86    pub analysis: String,
 87    pub score: u32,
 88}
 89
 90impl Example {
 91    /// Load an example from a directory containing base.toml, prompt.md, and criteria.md
 92    pub fn load_from_directory(dir_path: &Path, run_dir: &Path) -> Result<Self> {
 93        let name = Self::name_from_path(dir_path);
 94        let base_path = dir_path.join("base.toml");
 95        let prompt_path = dir_path.join("prompt.md");
 96        let criteria_path = dir_path.join("criteria.md");
 97
 98        let output_file_path = run_dir.join(format!(
 99            "{}.md",
100            dir_path.file_name().unwrap().to_str().unwrap()
101        ));
102
103        Ok(Example {
104            name: name.clone(),
105            base: toml::from_str(&fs::read_to_string(&base_path)?)?,
106            prompt: fs::read_to_string(prompt_path.clone())?,
107            criteria: fs::read_to_string(criteria_path.clone())?,
108            run_dir: run_dir.to_path_buf(),
109            output_file: None,
110            output_file_path,
111            log_prefix: name,
112        })
113    }
114
115    pub fn set_log_prefix_style(&mut self, color: &str, name_width: usize) {
116        self.log_prefix = format!(
117            "{}{:<width$}\x1b[0m | ",
118            color,
119            self.name,
120            width = name_width
121        );
122    }
123
124    pub fn name_from_path(path: &Path) -> String {
125        path.file_name().unwrap().to_string_lossy().to_string()
126    }
127
128    pub fn worktree_path(&self) -> PathBuf {
129        Path::new(WORKTREES_DIR)
130            .canonicalize()
131            .context(format!("No such directory {WORKTREES_DIR}"))
132            .unwrap()
133            .join(&self.name)
134    }
135
136    /// Set up the example by checking out the specified Git revision
137    pub async fn setup(&mut self) -> Result<()> {
138        let repo_path = repo_path_for_url(&self.base.url);
139
140        println!("{}Fetching", self.log_prefix);
141
142        run_git(
143            &repo_path,
144            &["fetch", "--depth", "1", "origin", &self.base.revision],
145        )
146        .await?;
147
148        let worktree_path = self.worktree_path();
149
150        if worktree_path.is_dir() {
151            println!("{}Resetting existing worktree", self.log_prefix);
152
153            // TODO: consider including "-x" to remove ignored files. The downside of this is that
154            // it will also remove build artifacts, and so prevent incremental reuse there.
155            run_git(&worktree_path, &["clean", "--force", "-d"]).await?;
156            run_git(&worktree_path, &["reset", "--hard", "HEAD"]).await?;
157            run_git(&worktree_path, &["checkout", &self.base.revision]).await?;
158        } else {
159            println!("{}Creating worktree", self.log_prefix);
160
161            let worktree_path_string = worktree_path.to_string_lossy().to_string();
162
163            run_git(
164                &repo_path,
165                &[
166                    "worktree",
167                    "add",
168                    "-f",
169                    &worktree_path_string,
170                    &self.base.revision,
171                ],
172            )
173            .await?;
174        }
175
176        // Create the output file
177        let output_file = Arc::new(Mutex::new(File::create(&self.output_file_path)?));
178        self.output_file = Some(output_file);
179
180        Ok(())
181    }
182
183    /// Returns the output file, panicking if it's not set
184    fn output_file(&self) -> Arc<Mutex<File>> {
185        self.output_file
186            .clone()
187            .expect("Output file not created. Call setup() first.")
188    }
189
190    pub fn run(
191        &self,
192        model: Arc<dyn LanguageModel>,
193        app_state: Arc<AgentAppState>,
194        cx: &mut App,
195    ) -> Task<Result<RunOutput>> {
196        let project = Project::local(
197            app_state.client.clone(),
198            app_state.node_runtime.clone(),
199            app_state.user_store.clone(),
200            app_state.languages.clone(),
201            Arc::new(DapRegistry::default()),
202            app_state.fs.clone(),
203            None,
204            cx,
205        );
206
207        let worktree_path = self.worktree_path();
208        let worktree = project.update(cx, |project, cx| {
209            project.create_worktree(&worktree_path, true, cx)
210        });
211
212        let tools = cx.new(|_| ToolWorkingSet::default());
213        let thread_store =
214            ThreadStore::load(project.clone(), tools, app_state.prompt_builder.clone(), cx);
215        let this = self.clone();
216
217        cx.spawn(async move |cx| {
218            let worktree = worktree.await?;
219
220            // Wait for worktree scan to finish before choosing a file to open.
221            worktree
222                .update(cx, |worktree, _cx| {
223                    worktree.as_local().unwrap().scan_complete()
224                })?
225                .await;
226
227            let lsp_open_handle_and_store = if this.base.require_lsp {
228                let language_extension = this.base.language_extension.as_deref().context(
229                    "language_extension field is required in base.toml when `require_lsp == true`",
230                )?;
231
232                // Open a file that matches the language to cause LSP to start.
233                let language_file = worktree.read_with(cx, |worktree, _cx| {
234                    worktree
235                        .files(false, 0)
236                        .find_map(|e| {
237                            if e.path.clone().extension().and_then(|ext| ext.to_str())
238                                == Some(language_extension)
239                            {
240                                Some(ProjectPath {
241                                    worktree_id: worktree.id(),
242                                    path: e.path.clone(),
243                                })
244                            } else {
245                                None
246                            }
247                        })
248                        .context("Failed to find a file for example language")
249                })??;
250
251                let open_language_file_buffer_task = project.update(cx, |project, cx| {
252                    project.open_buffer(language_file.clone(), cx)
253                })?;
254
255                let language_file_buffer = open_language_file_buffer_task.await?;
256
257                let (lsp_open_handle, lsp_store) = project.update(cx, |project, cx| {
258                    (
259                        project.register_buffer_with_language_servers(&language_file_buffer, cx),
260                        project.lsp_store().clone(),
261                    )
262                })?;
263
264                // TODO: remove this once the diagnostics tool waits for new diagnostics
265                cx.background_executor().timer(Duration::new(5, 0)).await;
266                wait_for_lang_server(&lsp_store, this.log_prefix.clone(), cx).await?;
267
268                lsp_store.update(cx, |lsp_store, cx| {
269                    lsp_open_handle.update(cx, |buffer, cx| {
270                        buffer.update(cx, |buffer, cx| {
271                            let has_language_server = lsp_store
272                                .language_servers_for_local_buffer(buffer, cx)
273                                .next()
274                                .is_some();
275                            if has_language_server {
276                                Ok(())
277                            } else {
278                                Err(anyhow!(
279                                    "`{:?}` was opened to cause the language server to start, \
280                                    but no language servers are registered for its buffer. \
281                                    Set `require_lsp = false` in `base.toml` to skip this.",
282                                    language_file
283                                ))
284                            }
285                        })
286                    })
287                })??;
288
289                Some((lsp_open_handle, lsp_store))
290            } else {
291                None
292            };
293
294            if std::env::var("ZED_EVAL_SETUP_ONLY").is_ok() {
295                return Err(anyhow!("Setup only mode"));
296            }
297
298            let thread_store = thread_store.await;
299            let thread =
300                thread_store.update(cx, |thread_store, cx| thread_store.create_thread(cx))?;
301
302            {
303                let output_file_ref = this.output_file();
304                let mut output_file = output_file_ref.lock().unwrap();
305                writeln!(&mut output_file, "👤 USER:").log_err();
306                writeln!(&mut output_file, "{}", this.prompt).log_err();
307                writeln!(&mut output_file, "🤖 ASSISTANT:").log_err();
308                output_file.flush().log_err();
309            }
310
311            let tool_use_counts: Arc<Mutex<HashMap<Arc<str>, u32>>> =
312                Mutex::new(HashMap::default()).into();
313
314            let (thread_event_tx, mut thread_event_rx) = mpsc::unbounded();
315
316            let subscription = cx.subscribe(&thread, move |_thread, event: &ThreadEvent, _cx| {
317                thread_event_tx.unbounded_send(event.clone()).log_err();
318            });
319
320            let event_handler_task = cx.spawn({
321                // Need to clone the Arc here because the reference from output_file() won't live long enough
322                let output_file = this.output_file.clone().unwrap();
323                let log_prefix = this.log_prefix.clone();
324                let tool_use_counts = tool_use_counts.clone();
325                let thread = thread.downgrade();
326                async move |cx| {
327                    loop {
328                        let event = select_biased! {
329                            event = thread_event_rx.next() => event,
330                            _ = cx.background_executor().timer(THREAD_EVENT_TIMEOUT).fuse() => {
331                                return Err(anyhow!("Agentic loop stalled - waited {:?} without any events", THREAD_EVENT_TIMEOUT));
332                            }
333                        };
334                        let Some(event) = event else {
335                            return Err(anyhow!("ThreadEvent channel ended early"));
336                        };
337
338                        let mut output_file = output_file.lock().unwrap();
339
340                        match event {
341                            ThreadEvent::Stopped(reason) => match reason {
342                                Ok(StopReason::EndTurn) => {
343                                    return Ok(());
344                                }
345                                Ok(StopReason::MaxTokens) => {
346                                    return Err(anyhow!("Exceeded maximum tokens"));
347                                }
348                                Ok(StopReason::ToolUse) => {
349                                    if std::env::var("ZED_EVAL_DEBUG").is_ok() {
350                                        println!("{}StopReason: Tool use", log_prefix);
351                                    }
352                                }
353                                Err(error) => {
354                                    return Err(anyhow!(error.clone()));
355                                }
356                            },
357                            ThreadEvent::ShowError(thread_error) => {
358                                break Err(anyhow!(thread_error.clone()));
359                            }
360                            ThreadEvent::StreamedAssistantText(_, chunk) => {
361                                write!(&mut output_file, "{}", chunk).log_err();
362                            }
363                            ThreadEvent::StreamedAssistantThinking(_, chunk) => {
364                                write!(&mut output_file, "{}", chunk).log_err();
365                            }
366                            ThreadEvent::UsePendingTools { tool_uses } => {
367                                writeln!(&mut output_file, "\n\nUSING TOOLS:").log_err();
368                                for tool_use in tool_uses {
369                                    writeln!(&mut output_file, "{}: {}", tool_use.name, tool_use.input)
370                                        .log_err();
371                                }
372                            }
373                            ThreadEvent::ToolFinished {
374                                tool_use_id,
375                                pending_tool_use,
376                                ..
377                            } => {
378                                if let Some(tool_use) = pending_tool_use {
379                                    let message = format!("TOOL FINISHED: {}", tool_use.name);
380                                    println!("{}{message}", log_prefix);
381                                    writeln!(&mut output_file, "\n{}", message).log_err();
382                                }
383                                thread.update(cx, |thread, _cx| {
384                                    if let Some(tool_result) = thread.tool_result(&tool_use_id) {
385                                        writeln!(&mut output_file, "\n{}\n", tool_result.content).log_err();
386                                        let mut tool_use_counts = tool_use_counts.lock().unwrap();
387                                        *tool_use_counts
388                                            .entry(tool_result.tool_name.clone())
389                                            .or_insert(0) += 1;
390                                    }
391                                })?;
392                            }
393                            ThreadEvent::ToolConfirmationNeeded => {
394                                panic!("{}Bug: Tool confirmation should not be required in eval", log_prefix);
395                            },
396                            ThreadEvent::StreamedCompletion |
397                            ThreadEvent::MessageAdded(_) |
398                            ThreadEvent::MessageEdited(_) |
399                            ThreadEvent::MessageDeleted(_) |
400                            ThreadEvent::SummaryChanged |
401                            ThreadEvent::SummaryGenerated |
402                            ThreadEvent::CheckpointChanged => {
403                                if std::env::var("ZED_EVAL_DEBUG").is_ok() {
404                                    println!("{}Event: {:#?}", log_prefix, event);
405                                }
406                            }
407                        }
408
409                        output_file.flush().log_err();
410                    }
411                }
412            });
413
414            thread.update(cx, |thread, cx| {
415                let context = vec![];
416                thread.insert_user_message(this.prompt.clone(), context, None, cx);
417                thread.send_to_model(model, RequestKind::Chat, cx);
418            })?;
419
420            event_handler_task.await?;
421
422            println!("{}Stopped", this.log_prefix);
423
424            if let Some((_, lsp_store)) = lsp_open_handle_and_store.as_ref() {
425                wait_for_lang_server(lsp_store, this.log_prefix.clone(), cx).await?;
426            }
427
428            println!("{}Getting repository diff", this.log_prefix);
429            let repository_diff = this.repository_diff().await?;
430
431            let repository_diff_path = this.run_dir.join(format!("{}.diff", this.name));
432            let mut repository_diff_output_file = File::create(&repository_diff_path)?;
433            writeln!(&mut repository_diff_output_file, "{}", &repository_diff).log_err();
434
435            println!("{}Getting diagnostics", this.log_prefix);
436            let diagnostics = cx
437                .update(move |cx| {
438                    cx.spawn(async move |cx| query_lsp_diagnostics(project, cx).await)
439                })?
440                .await?;
441            println!("{}Got diagnostics", this.log_prefix);
442
443            drop(subscription);
444            drop(lsp_open_handle_and_store);
445
446            thread.update(cx, |thread, _cx| {
447                let response_count = thread
448                    .messages()
449                    .filter(|message| message.role == language_model::Role::Assistant)
450                    .count();
451                RunOutput {
452                    repository_diff,
453                    diagnostics,
454                    response_count,
455                    token_usage: thread.cumulative_token_usage(),
456                    tool_use_counts: tool_use_counts.lock().unwrap().clone(),
457                }
458            })
459        })
460    }
461
462    pub async fn judge(
463        &self,
464        model: Arc<dyn LanguageModel>,
465        repository_diff: String,
466        judge_repetitions: u32,
467        cx: &AsyncApp,
468    ) -> Result<JudgeOutput> {
469        let judge_prompt = include_str!("judge_prompt.hbs");
470        let judge_prompt_name = "judge_prompt";
471        let mut handlebars = Handlebars::new();
472        handlebars.register_template_string(judge_prompt_name, judge_prompt)?;
473        let prompt = handlebars.render(
474            judge_prompt_name,
475            &JudgeInput {
476                repository_diff,
477                criteria: self.criteria.clone(),
478            },
479        )?;
480
481        let request = LanguageModelRequest {
482            messages: vec![LanguageModelRequestMessage {
483                role: Role::User,
484                content: vec![MessageContent::Text(prompt)],
485                cache: false,
486            }],
487            temperature: None,
488            tools: Vec::new(),
489            stop: Vec::new(),
490        };
491
492        let response = send_language_model_request(model, request, cx).await?;
493
494        let judge_file_path = self.run_dir.join(format!(
495            "{}_judge_{}.md",
496            self.name, // This is the eval_name
497            judge_repetitions
498        ));
499
500        let mut judge_output_file = File::create(&judge_file_path)?;
501        writeln!(&mut judge_output_file, "{}", &response).log_err();
502
503        parse_judge_output(&response)
504    }
505
506    pub async fn repository_diff(&self) -> Result<String> {
507        let worktree_path = self.worktree_path();
508        run_git(&worktree_path, &["add", "-N"]).await?;
509        run_git(&worktree_path, &["diff"]).await
510    }
511}
512
513fn wait_for_lang_server(
514    lsp_store: &Entity<LspStore>,
515    log_prefix: String,
516    cx: &mut AsyncApp,
517) -> Task<Result<()>> {
518    if cx
519        .update(|cx| !has_pending_lang_server_work(lsp_store, cx))
520        .unwrap()
521        || std::env::var("ZED_EVAL_SKIP_LS_WAIT").is_ok()
522    {
523        return Task::ready(anyhow::Ok(()));
524    }
525
526    println!("{}⏵ Waiting for language server", log_prefix);
527
528    let (mut tx, mut rx) = mpsc::channel(1);
529
530    let subscription =
531        cx.subscribe(&lsp_store, {
532            let log_prefix = log_prefix.clone();
533            move |lsp_store, event, cx| {
534                match event {
535                    project::LspStoreEvent::LanguageServerUpdate {
536                        message:
537                            client::proto::update_language_server::Variant::WorkProgress(
538                                LspWorkProgress {
539                                    message: Some(message),
540                                    ..
541                                },
542                            ),
543                        ..
544                    } => println!("{}⟲ {message}", log_prefix),
545                    _ => {}
546                }
547
548                if !has_pending_lang_server_work(&lsp_store, cx) {
549                    tx.try_send(()).ok();
550                }
551            }
552        });
553
554    cx.spawn(async move |cx| {
555        let timeout = cx.background_executor().timer(Duration::new(60 * 5, 0));
556        let result = futures::select! {
557            _ = rx.next() => {
558                println!("{}⚑ Language server idle", log_prefix);
559                anyhow::Ok(())
560            },
561            _ = timeout.fuse() => {
562                Err(anyhow!("LSP wait timed out after 5 minutes"))
563            }
564        };
565        drop(subscription);
566        result
567    })
568}
569
570fn has_pending_lang_server_work(lsp_store: &Entity<LspStore>, cx: &App) -> bool {
571    lsp_store
572        .read(cx)
573        .language_server_statuses()
574        .any(|(_, status)| !status.pending_work.is_empty())
575}
576
577async fn query_lsp_diagnostics(project: Entity<Project>, cx: &mut AsyncApp) -> Result<String> {
578    let paths_with_diagnostics = project.update(cx, |project, cx| {
579        project
580            .diagnostic_summaries(true, cx)
581            .filter(|(_, _, summary)| summary.error_count > 0 || summary.warning_count > 0)
582            .map(|(project_path, _, _)| project_path)
583            .collect::<Vec<_>>()
584    })?;
585
586    let mut output = String::new();
587    for project_path in paths_with_diagnostics {
588        let buffer = project
589            .update(cx, |project, cx| project.open_buffer(project_path, cx))?
590            .await?;
591        let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
592
593        for (_, group) in snapshot.diagnostic_groups(None) {
594            let entry = &group.entries[group.primary_ix];
595            let range = entry.range.to_point(&snapshot);
596            let severity = match entry.diagnostic.severity {
597                DiagnosticSeverity::ERROR => "error",
598                DiagnosticSeverity::WARNING => "warning",
599                _ => continue,
600            };
601
602            writeln!(
603                output,
604                "{} at line {}: {}",
605                severity,
606                range.start.row + 1,
607                entry.diagnostic.message
608            )?;
609        }
610    }
611    anyhow::Ok(output)
612}
613
614fn parse_judge_output(response: &str) -> Result<JudgeOutput> {
615    let analysis = get_tag("analysis", response)?.to_string();
616    let score = get_tag("score", response)?
617        .parse()
618        .context("error parsing score")?;
619
620    Ok(JudgeOutput { analysis, score })
621}
622
623fn get_tag(name: &'static str, response: &str) -> Result<String> {
624    let start_tag = format!("<{}>", name);
625    let end_tag = format!("</{}>", name);
626
627    let start_ix = response
628        .find(&start_tag)
629        .context(format!("{} start tag not found", name))?;
630    let content_start_ix = start_ix + start_tag.len();
631
632    let end_ix = content_start_ix
633        + response[content_start_ix..]
634            .find(&end_tag)
635            .context(format!("{} end tag not found", name))?;
636
637    let content = response[content_start_ix..end_ix].trim().unindent();
638
639    anyhow::Ok(content)
640}
641
642pub fn repo_path_for_url(repo_url: &str) -> PathBuf {
643    let repo_name = repo_url
644        .trim_start_matches("https://")
645        .replace(|c: char| !c.is_alphanumeric(), "-");
646    Path::new(REPOS_DIR)
647        .canonicalize()
648        .context(format!("No such directory {REPOS_DIR}"))
649        .unwrap()
650        .join(repo_name)
651}
652
653pub async fn run_git(repo_path: &Path, args: &[&str]) -> Result<String> {
654    let output = new_smol_command("git")
655        .current_dir(repo_path)
656        .args(args)
657        .output()
658        .await?;
659
660    if output.status.success() {
661        Ok(String::from_utf8(output.stdout)?.trim().to_string())
662    } else {
663        Err(anyhow!(
664            "`git {}` within `{}` failed with status: {}\nstderr:\n{}\nstdout:\n{}",
665            args.join(" "),
666            repo_path.display(),
667            output.status,
668            String::from_utf8_lossy(&output.stderr),
669            String::from_utf8_lossy(&output.stdout),
670        ))
671    }
672}
673
674pub async fn send_language_model_request(
675    model: Arc<dyn LanguageModel>,
676    request: LanguageModelRequest,
677    cx: &AsyncApp,
678) -> anyhow::Result<String> {
679    match model.stream_completion_text(request, &cx).await {
680        Ok(mut stream) => {
681            let mut full_response = String::new();
682            while let Some(chunk_result) = stream.stream.next().await {
683                match chunk_result {
684                    Ok(chunk_str) => {
685                        full_response.push_str(&chunk_str);
686                    }
687                    Err(err) => {
688                        return Err(anyhow!(
689                            "Error receiving response from language model: {err}"
690                        ));
691                    }
692                }
693            }
694            Ok(full_response)
695        }
696        Err(err) => Err(anyhow!(
697            "Failed to get response from language model. Error was: {err}"
698        )),
699    }
700}
701
702#[cfg(test)]
703mod test {
704    use super::*;
705
706    #[test]
707    fn test_parse_judge_output() {
708        let response = r#"
709            <analysis>The model did a good job but there were still compilations errors.</analysis>
710            <score>3</score>
711        "#
712        .unindent();
713
714        let output = parse_judge_output(&response).unwrap();
715        assert_eq!(
716            output.analysis,
717            "The model did a good job but there were still compilations errors."
718        );
719        assert_eq!(output.score, 3);
720
721        let response = r#"
722            Text around ignored
723
724            <analysis>
725                Failed to compile:
726                - Error 1
727                - Error 2
728            </analysis>
729
730            <score>1</score>
731        "#
732        .unindent();
733
734        let output = parse_judge_output(&response).unwrap();
735        assert_eq!(output.analysis, "Failed to compile:\n- Error 1\n- Error 2");
736        assert_eq!(output.score, 1);
737    }
738}