1mod assertions;
2mod example;
3mod examples;
4mod explorer;
5mod ids;
6mod instance;
7mod tool_metrics;
8
9use assertions::{AssertionsReport, display_error_row};
10use instance::{ExampleInstance, JudgeOutput, RunOutput, run_git};
11use language_extension::LspAccess;
12pub(crate) use tool_metrics::*;
13
14use ::fs::RealFs;
15use clap::Parser;
16use client::{Client, ProxySettings, UserStore};
17use collections::{HashMap, HashSet};
18use extension::ExtensionHostProxy;
19use futures::future;
20use gpui::http_client::read_proxy_from_env;
21use gpui::{App, AppContext, Application, AsyncApp, Entity, UpdateGlobal};
22use gpui_tokio::Tokio;
23use language::LanguageRegistry;
24use language_model::{ConfiguredModel, LanguageModel, LanguageModelRegistry, SelectedModel};
25use node_runtime::{NodeBinaryOptions, NodeRuntime};
26use project::Project;
27use project::project_settings::ProjectSettings;
28use prompt_store::PromptBuilder;
29use release_channel::AppVersion;
30use reqwest_client::ReqwestClient;
31use settings::{Settings, SettingsStore};
32use std::cell::RefCell;
33use std::collections::VecDeque;
34use std::env;
35use std::path::{Path, PathBuf};
36use std::rc::Rc;
37use std::str::FromStr;
38use std::sync::{Arc, LazyLock};
39use util::ResultExt as _;
40
41static CARGO_MANIFEST_DIR: LazyLock<PathBuf> =
42 LazyLock::new(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")));
43
44#[derive(Parser, Debug)]
45#[command(name = "eval", disable_version_flag = true)]
46struct Args {
47 /// Runs all examples and threads that contain these substrings. If unspecified, all examples and threads are run.
48 #[arg(value_name = "EXAMPLE_SUBSTRING")]
49 filter: Vec<String>,
50 /// provider/model to use for agent
51 #[arg(long, default_value = "anthropic/claude-3-7-sonnet-latest")]
52 model: String,
53 /// provider/model to use for judges
54 #[arg(long, default_value = "anthropic/claude-3-7-sonnet-latest")]
55 judge_model: String,
56 #[arg(long, value_delimiter = ',', default_value = "rs,ts,py")]
57 languages: Vec<String>,
58 /// How many times to run each example.
59 #[arg(long, default_value = "8")]
60 repetitions: usize,
61 /// Maximum number of examples to run concurrently.
62 #[arg(long, default_value = "4")]
63 concurrency: usize,
64}
65
66fn main() {
67 dotenvy::from_filename(CARGO_MANIFEST_DIR.join(".env")).ok();
68
69 env_logger::init();
70
71 let system_id = ids::get_or_create_id(&ids::eval_system_id_path()).ok();
72 let installation_id = ids::get_or_create_id(&ids::eval_installation_id_path()).ok();
73 let session_id = uuid::Uuid::new_v4().to_string();
74 let run_timestamp = chrono::Local::now().format("%Y-%m-%d_%H-%M-%S");
75 let run_id = match env::var("GITHUB_RUN_ID") {
76 Ok(run_id) => format!("github/{}", run_id),
77 Err(_) => format!("local/{}", run_timestamp),
78 };
79
80 let root_dir = Path::new(std::env!("CARGO_MANIFEST_DIR"))
81 .parent()
82 .unwrap()
83 .parent()
84 .unwrap()
85 .canonicalize()
86 .unwrap();
87 let eval_crate_dir = root_dir.join("crates").join("eval");
88 let repos_dir = eval_crate_dir.join("repos");
89 let worktrees_dir = eval_crate_dir.join("worktrees");
90 let examples_dir = eval_crate_dir.join("src").join("examples");
91 let run_dir = eval_crate_dir
92 .join("runs")
93 .join(format!("{}", run_timestamp));
94 std::fs::create_dir_all(&run_dir).unwrap();
95 std::fs::create_dir_all(&repos_dir).unwrap();
96 std::fs::create_dir_all(&worktrees_dir).unwrap();
97 std::fs::create_dir_all(&examples_dir).unwrap();
98 std::fs::create_dir_all(&paths::config_dir()).unwrap();
99
100 let zed_commit_sha = commit_sha_for_path(&root_dir);
101 let zed_branch_name = git_branch_for_path(&root_dir);
102 let args = Args::parse();
103 let languages: HashSet<String> = args.languages.into_iter().collect();
104
105 let http_client = Arc::new(ReqwestClient::new());
106 let app = Application::headless().with_http_client(http_client.clone());
107 let all_threads = examples::all(&examples_dir);
108
109 app.run(move |cx| {
110 let app_state = init(cx);
111
112 let telemetry = app_state.client.telemetry();
113 telemetry.start(system_id, installation_id, session_id, cx);
114
115 let enable_telemetry = env::var("ZED_EVAL_TELEMETRY").map_or(false, |value| value == "1")
116 && telemetry.has_checksum_seed();
117 if enable_telemetry {
118 println!("Telemetry enabled");
119 telemetry::event!(
120 "Agent Eval Started",
121 zed_commit_sha = zed_commit_sha,
122 zed_branch_name = zed_branch_name,
123 run_id = run_id,
124 );
125 }
126
127 let mut cumulative_tool_metrics = ToolMetrics::default();
128
129 let agent_model = load_model(&args.model, cx).unwrap();
130 let judge_model = load_model(&args.judge_model, cx).unwrap();
131
132 LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
133 registry.set_default_model(Some(agent_model.clone()), cx);
134 });
135
136 let auth1 = agent_model.provider.authenticate(cx);
137 let auth2 = judge_model.provider.authenticate(cx);
138
139 cx.spawn(async move |cx| {
140 auth1.await?;
141 auth2.await?;
142
143 let mut examples = Vec::new();
144
145 const COLORS: [&str; 12] = [
146 "\x1b[31m", // Red
147 "\x1b[32m", // Green
148 "\x1b[33m", // Yellow
149 "\x1b[34m", // Blue
150 "\x1b[35m", // Magenta
151 "\x1b[36m", // Cyan
152 "\x1b[91m", // Bright Red
153 "\x1b[92m", // Bright Green
154 "\x1b[93m", // Bright Yellow
155 "\x1b[94m", // Bright Blue
156 "\x1b[95m", // Bright Magenta
157 "\x1b[96m", // Bright Cyan
158 ];
159
160 let mut skipped = Vec::new();
161
162 for thread in all_threads {
163 let meta = thread.meta();
164 if !args.filter.is_empty() && !args.filter.iter().any(|sub| meta.name.contains(sub))
165 {
166 skipped.push(meta.name);
167 continue;
168 }
169
170 if let Some(language) = meta.language_server {
171 if !languages.contains(&language.file_extension) {
172 panic!(
173 "Eval for {:?} could not be run because no language server was found for extension {:?}",
174 meta.name,
175 language.file_extension
176 );
177 }
178 }
179
180 // TODO: This creates a worktree per repetition. Ideally these examples should
181 // either be run sequentially on the same worktree, or reuse worktrees when there
182 // are more examples to run than the concurrency limit.
183 for repetition_number in 0..args.repetitions {
184 let example_instance = ExampleInstance::new(
185 thread.clone(),
186 &repos_dir,
187 &run_dir,
188 &worktrees_dir,
189 repetition_number,
190 );
191
192 examples.push(example_instance);
193 }
194 }
195
196 if !skipped.is_empty() {
197 println!("Skipped threads: {}", skipped.join(", "));
198 }
199
200 if examples.is_empty() {
201 eprintln!("Filter matched no examples");
202 return cx.update(|cx| cx.quit());
203 }
204
205 let mut repo_urls = HashSet::default();
206 let mut clone_tasks = Vec::new();
207
208 let max_name_width = examples
209 .iter()
210 .map(|e| e.worktree_name().len())
211 .max()
212 .unwrap_or(0);
213
214 for (i, example_instance) in examples.iter_mut().enumerate() {
215 let color = COLORS[i % COLORS.len()].to_string();
216 example_instance.set_log_prefix_style(&color, max_name_width);
217
218 println!(
219 "{}Logging to: {}",
220 example_instance.log_prefix,
221 example_instance.run_directory.display()
222 );
223
224 let repo_url = example_instance.repo_url();
225 if repo_urls.insert(repo_url.clone()) {
226 let repo_path = example_instance.repo_path.clone();
227
228 if !repo_path.join(".git").is_dir() {
229 println!(
230 "{:<width$} < {}",
231 "↓ Cloning",
232 repo_url,
233 width = max_name_width
234 );
235
236 let git_task = cx.spawn(async move |_cx| {
237 std::fs::create_dir_all(&repo_path)?;
238 run_git(&repo_path, &["init"]).await?;
239 run_git(&repo_path, &["remote", "add", "origin", &repo_url]).await
240 });
241
242 clone_tasks.push(git_task);
243 } else {
244 println!(
245 "{:<width$} < {}",
246 "✔︎ Already cloned",
247 repo_url,
248 width = max_name_width
249 );
250
251 let actual_origin =
252 run_git(&repo_path, &["remote", "get-url", "origin"]).await?;
253 anyhow::ensure!(
254 actual_origin == repo_url,
255 "remote origin {actual_origin} does not match expected origin {repo_url}"
256 );
257 }
258 }
259 }
260
261 future::join_all(clone_tasks).await;
262
263 for example_instance in examples.iter_mut() {
264 example_instance.fetch().await?;
265 }
266
267 let examples = Rc::new(RefCell::new(VecDeque::from(examples)));
268 let results_by_example_name = Rc::new(RefCell::new(HashMap::default()));
269
270 future::join_all((0..args.concurrency).map(|_| {
271 let app_state = app_state.clone();
272 let model = agent_model.model.clone();
273 let judge_model = judge_model.model.clone();
274 let zed_commit_sha = zed_commit_sha.clone();
275 let zed_branch_name = zed_branch_name.clone();
276 let run_id = run_id.clone();
277 let examples = examples.clone();
278 let results = results_by_example_name.clone();
279 cx.spawn(async move |cx| {
280 loop {
281 let Some(mut example) = examples.borrow_mut().pop_front() else {
282 break;
283 };
284 let result = async {
285 example.setup().await?;
286 let run_output = cx
287 .update(|cx| example.run(model.clone(), app_state.clone(), cx))?
288 .await?;
289 let judge_output = judge_example(
290 example.clone(),
291 judge_model.clone(),
292 &zed_commit_sha,
293 &zed_branch_name,
294 &run_id,
295 &run_output,
296 enable_telemetry,
297 cx,
298 )
299 .await;
300 anyhow::Ok((run_output, judge_output))
301 }
302 .await;
303 results
304 .borrow_mut()
305 .entry(example.name.clone())
306 .or_insert(Vec::new())
307 .push((example.clone(), result));
308 }
309 })
310 }))
311 .await;
312
313 print_report(
314 &mut results_by_example_name.borrow_mut(),
315 &mut cumulative_tool_metrics,
316 &run_dir,
317 )?;
318
319 app_state.client.telemetry().flush_events().await;
320
321 cx.update(|cx| cx.quit())
322 })
323 .detach_and_log_err(cx);
324 });
325}
326
327/// Subset of `workspace::AppState` needed by `HeadlessAssistant`, with additional fields.
328pub struct AgentAppState {
329 pub languages: Arc<LanguageRegistry>,
330 pub client: Arc<Client>,
331 pub user_store: Entity<UserStore>,
332 pub fs: Arc<dyn fs::Fs>,
333 pub node_runtime: NodeRuntime,
334
335 // Additional fields not present in `workspace::AppState`.
336 pub prompt_builder: Arc<PromptBuilder>,
337}
338
339pub fn init(cx: &mut App) -> Arc<AgentAppState> {
340 let app_version = AppVersion::load(env!("ZED_PKG_VERSION"));
341 release_channel::init(app_version, cx);
342 gpui_tokio::init(cx);
343
344 let mut settings_store = SettingsStore::new(cx);
345 settings_store
346 .set_default_settings(settings::default_settings().as_ref(), cx)
347 .unwrap();
348 cx.set_global(settings_store);
349 client::init_settings(cx);
350
351 // Set User-Agent so we can download language servers from GitHub
352 let user_agent = format!(
353 "Zed Agent Eval/{} ({}; {})",
354 app_version,
355 std::env::consts::OS,
356 std::env::consts::ARCH
357 );
358 let proxy_str = ProxySettings::get_global(cx).proxy.to_owned();
359 let proxy_url = proxy_str
360 .as_ref()
361 .and_then(|input| input.parse().ok())
362 .or_else(read_proxy_from_env);
363 let http = {
364 let _guard = Tokio::handle(cx).enter();
365
366 ReqwestClient::proxy_and_user_agent(proxy_url, &user_agent)
367 .expect("could not start HTTP client")
368 };
369 cx.set_http_client(Arc::new(http));
370
371 Project::init_settings(cx);
372
373 let client = Client::production(cx);
374 cx.set_http_client(client.http_client());
375
376 let git_binary_path = None;
377 let fs = Arc::new(RealFs::new(
378 git_binary_path,
379 cx.background_executor().clone(),
380 ));
381
382 let mut languages = LanguageRegistry::new(cx.background_executor().clone());
383 languages.set_language_server_download_dir(paths::languages_dir().clone());
384 let languages = Arc::new(languages);
385
386 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
387
388 extension::init(cx);
389
390 let (mut tx, rx) = watch::channel(None);
391 cx.observe_global::<SettingsStore>(move |cx| {
392 let settings = &ProjectSettings::get_global(cx).node;
393 let options = NodeBinaryOptions {
394 allow_path_lookup: !settings.ignore_system_version,
395 allow_binary_download: true,
396 use_paths: settings.path.as_ref().map(|node_path| {
397 let node_path = PathBuf::from(shellexpand::tilde(node_path).as_ref());
398 let npm_path = settings
399 .npm_path
400 .as_ref()
401 .map(|path| PathBuf::from(shellexpand::tilde(&path).as_ref()));
402 (
403 node_path.clone(),
404 npm_path.unwrap_or_else(|| {
405 let base_path = PathBuf::new();
406 node_path.parent().unwrap_or(&base_path).join("npm")
407 }),
408 )
409 }),
410 };
411 tx.send(Some(options)).log_err();
412 })
413 .detach();
414 let node_runtime = NodeRuntime::new(client.http_client(), None, rx);
415
416 let extension_host_proxy = ExtensionHostProxy::global(cx);
417
418 language::init(cx);
419 debug_adapter_extension::init(extension_host_proxy.clone(), cx);
420 language_extension::init(
421 LspAccess::Noop,
422 extension_host_proxy.clone(),
423 languages.clone(),
424 );
425 language_model::init(client.clone(), cx);
426 language_models::init(user_store.clone(), client.clone(), cx);
427 languages::init(languages.clone(), node_runtime.clone(), cx);
428 prompt_store::init(cx);
429 terminal_view::init(cx);
430 let stdout_is_a_pty = false;
431 let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
432 agent_ui::init(
433 fs.clone(),
434 client.clone(),
435 prompt_builder.clone(),
436 languages.clone(),
437 true,
438 cx,
439 );
440 assistant_tools::init(client.http_client(), cx);
441
442 SettingsStore::update_global(cx, |store, cx| {
443 store.set_user_settings(include_str!("../runner_settings.json"), cx)
444 })
445 .unwrap();
446
447 Arc::new(AgentAppState {
448 languages,
449 client,
450 user_store,
451 fs,
452 node_runtime,
453 prompt_builder,
454 })
455}
456
457pub fn find_model(
458 model_name: &str,
459 model_registry: &LanguageModelRegistry,
460 cx: &App,
461) -> anyhow::Result<Arc<dyn LanguageModel>> {
462 let selected = SelectedModel::from_str(model_name).map_err(|e| anyhow::anyhow!(e))?;
463 model_registry
464 .available_models(cx)
465 .find(|model| model.id() == selected.model && model.provider_id() == selected.provider)
466 .ok_or_else(|| {
467 anyhow::anyhow!(
468 "No language model with ID {}/{} was available. Available models: {}",
469 selected.model.0,
470 selected.provider.0,
471 model_registry
472 .available_models(cx)
473 .map(|model| format!("{}/{}", model.provider_id().0, model.id().0))
474 .collect::<Vec<_>>()
475 .join(", ")
476 )
477 })
478}
479
480pub fn load_model(model_name: &str, cx: &mut App) -> anyhow::Result<ConfiguredModel> {
481 let model = {
482 let model_registry = LanguageModelRegistry::read_global(cx);
483 find_model(model_name, model_registry, cx)?
484 };
485
486 let provider = {
487 let model_registry = LanguageModelRegistry::read_global(cx);
488 model_registry
489 .provider(&model.provider_id())
490 .ok_or_else(|| anyhow::anyhow!("Provider not found: {}", model.provider_id()))?
491 };
492
493 Ok(ConfiguredModel {
494 provider: provider.clone(),
495 model: model.clone(),
496 })
497}
498
499pub fn commit_sha_for_path(repo_path: &Path) -> String {
500 futures::executor::block_on(run_git(repo_path, &["rev-parse", "HEAD"])).unwrap()
501}
502
503pub fn git_branch_for_path(repo_path: &Path) -> String {
504 match std::env::var("GITHUB_REF_NAME") {
505 Ok(branch) => branch,
506 Err(_) => {
507 futures::executor::block_on(run_git(repo_path, &["rev-parse", "--abbrev-ref", "HEAD"]))
508 .unwrap_or_else(|_| "unknown".to_string())
509 }
510 }
511}
512
513async fn judge_example(
514 example: ExampleInstance,
515 model: Arc<dyn LanguageModel>,
516 zed_commit_sha: &str,
517 zed_branch_name: &str,
518 run_id: &str,
519 run_output: &RunOutput,
520 enable_telemetry: bool,
521 cx: &AsyncApp,
522) -> JudgeOutput {
523 let judge_output = example.judge(model.clone(), run_output, cx).await;
524
525 if enable_telemetry {
526 telemetry::event!(
527 "Agent Example Evaluated",
528 zed_commit_sha = zed_commit_sha,
529 zed_branch_name = zed_branch_name,
530 run_id = run_id,
531 example_name = example.name.clone(),
532 example_repetition = example.repetition,
533 diff_evaluation = judge_output.diff.clone(),
534 thread_evaluation = judge_output.thread.clone(),
535 tool_metrics = run_output.tool_metrics,
536 response_count = run_output.response_count,
537 token_usage = run_output.token_usage,
538 model = model.telemetry_id(),
539 model_provider = model.provider_id().to_string(),
540 repository_url = example.repo_url(),
541 repository_revision = example.revision(),
542 diagnostic_summary_before = run_output.diagnostic_summary_before,
543 diagnostic_summary_after = run_output.diagnostic_summary_after,
544 diagnostics_before = run_output.diagnostics_before,
545 diagnostics_after = run_output.diagnostics_after,
546 );
547 }
548
549 judge_output
550}
551
552const HEADER_WIDTH: usize = 65;
553
554fn print_h1(header: &str) {
555 println!("\n\n{:=^HEADER_WIDTH$}", "");
556 println!("{:^HEADER_WIDTH$}", header);
557 println!("{:=^HEADER_WIDTH$}\n", "");
558}
559
560fn print_h2(header: &str) {
561 println!("\n{:-^HEADER_WIDTH$}", "");
562 println!("{:^HEADER_WIDTH$}", header);
563 println!("{:-^HEADER_WIDTH$}\n", "");
564}
565
566fn print_report(
567 results_by_example_name: &mut HashMap<
568 String,
569 Vec<(ExampleInstance, anyhow::Result<(RunOutput, JudgeOutput)>)>,
570 >,
571 cumulative_tool_metrics: &mut ToolMetrics,
572 run_dir: &Path,
573) -> anyhow::Result<()> {
574 print_h1("EVAL RESULTS");
575
576 let mut diff_scores = Vec::new();
577 let mut thread_scores = Vec::new();
578 let mut programmatic_scores = Vec::new();
579 let mut error_count = 0;
580
581 for (example_name, results) in results_by_example_name.iter_mut() {
582 print_h2(example_name);
583
584 results.sort_unstable_by_key(|(example, _)| example.repetition);
585 let mut example_cumulative_tool_metrics = ToolMetrics::default();
586
587 let mut table_rows = String::new();
588
589 for (example, result) in results.iter() {
590 match result {
591 Err(err) => {
592 display_error_row(&mut table_rows, example.repetition, err.to_string())?;
593 error_count += 1;
594 programmatic_scores.push(0.0);
595 diff_scores.push(0.0);
596 thread_scores.push(0.0);
597 }
598 Ok((run_output, judge_output)) => {
599 cumulative_tool_metrics.merge(&run_output.tool_metrics);
600 example_cumulative_tool_metrics.merge(&run_output.tool_metrics);
601
602 if run_output.programmatic_assertions.total_count() > 0 {
603 for assertion in &run_output.programmatic_assertions.ran {
604 assertions::display_table_row(
605 &mut table_rows,
606 example.repetition,
607 assertion,
608 )?;
609 }
610
611 programmatic_scores
612 .push(run_output.programmatic_assertions.passed_percentage())
613 }
614
615 if !judge_output.diff.is_empty() {
616 diff_scores.push(judge_output.diff.passed_percentage());
617
618 for assertion in &judge_output.diff.ran {
619 assertions::display_table_row(
620 &mut table_rows,
621 example.repetition,
622 assertion,
623 )?;
624 }
625 }
626
627 if !judge_output.thread.is_empty() {
628 thread_scores.push(judge_output.thread.passed_percentage());
629
630 for assertion in &judge_output.thread.ran {
631 assertions::display_table_row(
632 &mut table_rows,
633 example.repetition,
634 assertion,
635 )?;
636 }
637 }
638 }
639 }
640 }
641
642 let mut all_asserts = Vec::new();
643
644 if !table_rows.is_empty() {
645 assertions::print_table_header();
646 print!("{}", table_rows);
647
648 assertions::print_table_divider();
649
650 for (example, result) in results.iter() {
651 if let Ok((run_output, judge_output)) = result {
652 let asserts = [
653 run_output.programmatic_assertions.clone(),
654 judge_output.diff.clone(),
655 judge_output.thread.clone(),
656 ];
657 all_asserts.extend_from_slice(&asserts);
658 assertions::print_table_round_summary(
659 &example.repetition.to_string(),
660 asserts.iter(),
661 )
662 } else if let Err(err) = result {
663 let assert = AssertionsReport::error(err.to_string());
664 all_asserts.push(assert.clone());
665 assertions::print_table_round_summary(
666 &example.repetition.to_string(),
667 [assert].iter(),
668 )
669 }
670 }
671
672 assertions::print_table_divider();
673
674 assertions::print_table_round_summary("avg", all_asserts.iter());
675
676 assertions::print_table_footer();
677 }
678
679 if !example_cumulative_tool_metrics.is_empty() {
680 println!("{}", &example_cumulative_tool_metrics);
681 }
682 }
683
684 if results_by_example_name.len() > 1 {
685 print_h1("AGGREGATE");
686
687 if error_count > 0 {
688 println!("\n{error_count} examples failed to run!");
689 }
690
691 let programmatic_score_count = programmatic_scores.len();
692 if programmatic_score_count > 0 {
693 let average_programmatic_score = (programmatic_scores.into_iter().sum::<f32>()
694 / (programmatic_score_count as f32))
695 .floor();
696 println!("Average programmatic score: {average_programmatic_score}%");
697 }
698
699 let diff_score_count = diff_scores.len();
700 if diff_score_count > 0 {
701 let average_diff_score =
702 (diff_scores.into_iter().sum::<f32>() / (diff_score_count as f32)).floor();
703 println!("Average diff score: {average_diff_score}%");
704 }
705
706 let thread_score_count = thread_scores.len();
707
708 if thread_score_count > 0 {
709 let average_thread_score =
710 (thread_scores.into_iter().sum::<f32>() / (thread_score_count as f32)).floor();
711 println!("Average thread score: {average_thread_score}%");
712 }
713
714 println!("");
715
716 print_h2("CUMULATIVE TOOL METRICS");
717 println!("{}", cumulative_tool_metrics);
718 }
719
720 let explorer_output_path = run_dir.join("overview.html");
721 let mut json_paths: Vec<PathBuf> = results_by_example_name
722 .values()
723 .flat_map(|results| {
724 results.iter().map(|(example, _)| {
725 let absolute_path = run_dir.join(example.run_directory.join("last.messages.json"));
726 let cwd = std::env::current_dir().expect("Can't get current dir");
727 pathdiff::diff_paths(&absolute_path, cwd).unwrap_or_else(|| absolute_path.clone())
728 })
729 })
730 .collect::<Vec<_>>();
731 json_paths.sort();
732 if let Err(err) = explorer::generate_explorer_html(&json_paths, &explorer_output_path) {
733 eprintln!("Failed to generate explorer HTML: {}", err);
734 }
735
736 Ok(())
737}