1mod assertions;
2mod example;
3mod examples;
4mod explorer;
5mod ids;
6mod instance;
7mod tool_metrics;
8
9use assertions::{AssertionsReport, display_error_row};
10use instance::{ExampleInstance, JudgeOutput, RunOutput, run_git};
11pub(crate) use tool_metrics::*;
12
13use ::fs::RealFs;
14use clap::Parser;
15use client::{Client, ProxySettings, UserStore};
16use collections::{HashMap, HashSet};
17use extension::ExtensionHostProxy;
18use futures::future;
19use gpui::http_client::read_proxy_from_env;
20use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, UpdateGlobal};
21use gpui_tokio::Tokio;
22use language::LanguageRegistry;
23use language_model::{ConfiguredModel, LanguageModel, LanguageModelRegistry, SelectedModel};
24use node_runtime::{NodeBinaryOptions, NodeRuntime};
25use project::Project;
26use project::project_settings::ProjectSettings;
27use prompt_store::PromptBuilder;
28use release_channel::AppVersion;
29use reqwest_client::ReqwestClient;
30use settings::{Settings, SettingsStore};
31use std::cell::RefCell;
32use std::collections::VecDeque;
33use std::env;
34use std::path::{Path, PathBuf};
35use std::rc::Rc;
36use std::str::FromStr;
37use std::sync::{Arc, LazyLock};
38use util::ResultExt as _;
39
40static CARGO_MANIFEST_DIR: LazyLock<PathBuf> =
41 LazyLock::new(|| PathBuf::from(env!("CARGO_MANIFEST_DIR")));
42
43#[derive(Parser, Debug)]
44#[command(name = "eval", disable_version_flag = true)]
45struct Args {
46 /// Runs all examples and threads that contain these substrings. If unspecified, all examples and threads are run.
47 #[arg(value_name = "EXAMPLE_SUBSTRING")]
48 filter: Vec<String>,
49 /// provider/model to use for agent
50 #[arg(long, default_value = "anthropic/claude-3-7-sonnet-latest")]
51 model: String,
52 /// provider/model to use for judges
53 #[arg(long, default_value = "anthropic/claude-3-7-sonnet-latest")]
54 judge_model: String,
55 #[arg(long, value_delimiter = ',', default_value = "rs,ts,py")]
56 languages: Vec<String>,
57 /// How many times to run each example.
58 #[arg(long, default_value = "8")]
59 repetitions: usize,
60 /// Maximum number of examples to run concurrently.
61 #[arg(long, default_value = "4")]
62 concurrency: usize,
63}
64
65fn main() {
66 dotenv::from_filename(CARGO_MANIFEST_DIR.join(".env")).ok();
67
68 env_logger::init();
69
70 let system_id = ids::get_or_create_id(&ids::eval_system_id_path()).ok();
71 let installation_id = ids::get_or_create_id(&ids::eval_installation_id_path()).ok();
72 let session_id = uuid::Uuid::new_v4().to_string();
73 let run_timestamp = chrono::Local::now().format("%Y-%m-%d_%H-%M-%S");
74 let run_id = match env::var("GITHUB_RUN_ID") {
75 Ok(run_id) => format!("github/{}", run_id),
76 Err(_) => format!("local/{}", run_timestamp),
77 };
78
79 let root_dir = Path::new(std::env!("CARGO_MANIFEST_DIR"))
80 .parent()
81 .unwrap()
82 .parent()
83 .unwrap()
84 .canonicalize()
85 .unwrap();
86 let eval_crate_dir = root_dir.join("crates").join("eval");
87 let repos_dir = eval_crate_dir.join("repos");
88 let worktrees_dir = eval_crate_dir.join("worktrees");
89 let examples_dir = eval_crate_dir.join("src").join("examples");
90 let run_dir = eval_crate_dir
91 .join("runs")
92 .join(format!("{}", run_timestamp));
93 std::fs::create_dir_all(&run_dir).unwrap();
94 std::fs::create_dir_all(&repos_dir).unwrap();
95 std::fs::create_dir_all(&worktrees_dir).unwrap();
96 std::fs::create_dir_all(&examples_dir).unwrap();
97 std::fs::create_dir_all(&paths::config_dir()).unwrap();
98
99 let zed_commit_sha = commit_sha_for_path(&root_dir);
100 let zed_branch_name = git_branch_for_path(&root_dir);
101 let args = Args::parse();
102 let languages: HashSet<String> = args.languages.into_iter().collect();
103
104 let http_client = Arc::new(ReqwestClient::new());
105 let app = Application::headless().with_http_client(http_client.clone());
106 let all_threads = examples::all(&examples_dir);
107
108 app.run(move |cx| {
109 let app_state = init(cx);
110
111 let telemetry = app_state.client.telemetry();
112 telemetry.start(system_id, installation_id, session_id, cx);
113
114 let enable_telemetry = env::var("ZED_EVAL_TELEMETRY").map_or(false, |value| value == "1")
115 && telemetry.has_checksum_seed();
116 if enable_telemetry {
117 println!("Telemetry enabled");
118 telemetry::event!(
119 "Agent Eval Started",
120 zed_commit_sha = zed_commit_sha,
121 zed_branch_name = zed_branch_name,
122 run_id = run_id,
123 );
124 }
125
126 let mut cumulative_tool_metrics = ToolMetrics::default();
127
128 let agent_model = load_model(&args.model, cx).unwrap();
129 let judge_model = load_model(&args.judge_model, cx).unwrap();
130
131 LanguageModelRegistry::global(cx).update(cx, |registry, cx| {
132 registry.set_default_model(Some(agent_model.clone()), cx);
133 });
134
135 let auth1 = agent_model.provider.authenticate(cx);
136 let auth2 = judge_model.provider.authenticate(cx);
137
138 cx.spawn(async move |cx| {
139 auth1.await?;
140 auth2.await?;
141
142 let mut examples = Vec::new();
143
144 const COLORS: [&str; 12] = [
145 "\x1b[31m", // Red
146 "\x1b[32m", // Green
147 "\x1b[33m", // Yellow
148 "\x1b[34m", // Blue
149 "\x1b[35m", // Magenta
150 "\x1b[36m", // Cyan
151 "\x1b[91m", // Bright Red
152 "\x1b[92m", // Bright Green
153 "\x1b[93m", // Bright Yellow
154 "\x1b[94m", // Bright Blue
155 "\x1b[95m", // Bright Magenta
156 "\x1b[96m", // Bright Cyan
157 ];
158
159 let mut skipped = Vec::new();
160
161 for thread in all_threads {
162 let meta = thread.meta();
163 if !args.filter.is_empty() && !args.filter.iter().any(|sub| meta.name.contains(sub))
164 {
165 skipped.push(meta.name);
166 continue;
167 }
168
169 if let Some(language) = meta.language_server {
170 if !languages.contains(&language.file_extension) {
171 panic!(
172 "Eval for {:?} could not be run because no language server was found for extension {:?}",
173 meta.name,
174 language.file_extension
175 );
176 }
177 }
178
179 // TODO: This creates a worktree per repetition. Ideally these examples should
180 // either be run sequentially on the same worktree, or reuse worktrees when there
181 // are more examples to run than the concurrency limit.
182 for repetition_number in 0..args.repetitions {
183 let example_instance = ExampleInstance::new(
184 thread.clone(),
185 &repos_dir,
186 &run_dir,
187 &worktrees_dir,
188 repetition_number,
189 );
190
191 examples.push(example_instance);
192 }
193 }
194
195 if !skipped.is_empty() {
196 println!("Skipped threads: {}", skipped.join(", "));
197 }
198
199 if examples.is_empty() {
200 eprintln!("Filter matched no examples");
201 return cx.update(|cx| cx.quit());
202 }
203
204 let mut repo_urls = HashSet::default();
205 let mut clone_tasks = Vec::new();
206
207 let max_name_width = examples
208 .iter()
209 .map(|e| e.worktree_name().len())
210 .max()
211 .unwrap_or(0);
212
213 for (i, example_instance) in examples.iter_mut().enumerate() {
214 let color = COLORS[i % COLORS.len()].to_string();
215 example_instance.set_log_prefix_style(&color, max_name_width);
216
217 println!(
218 "{}Logging to: {}",
219 example_instance.log_prefix,
220 example_instance.run_directory.display()
221 );
222
223 let repo_url = example_instance.repo_url();
224 if repo_urls.insert(repo_url.clone()) {
225 let repo_path = example_instance.repo_path.clone();
226
227 if !repo_path.join(".git").is_dir() {
228 println!(
229 "{:<width$} < {}",
230 "↓ Cloning",
231 repo_url,
232 width = max_name_width
233 );
234
235 let git_task = cx.spawn(async move |_cx| {
236 std::fs::create_dir_all(&repo_path)?;
237 run_git(&repo_path, &["init"]).await?;
238 run_git(&repo_path, &["remote", "add", "origin", &repo_url]).await
239 });
240
241 clone_tasks.push(git_task);
242 } else {
243 println!(
244 "{:<width$} < {}",
245 "✔︎ Already cloned",
246 repo_url,
247 width = max_name_width
248 );
249
250 let actual_origin =
251 run_git(&repo_path, &["remote", "get-url", "origin"]).await?;
252 anyhow::ensure!(
253 actual_origin == repo_url,
254 "remote origin {actual_origin} does not match expected origin {repo_url}"
255 );
256 }
257 }
258 }
259
260 future::join_all(clone_tasks).await;
261
262 for example_instance in examples.iter_mut() {
263 example_instance.fetch().await?;
264 }
265
266 let examples = Rc::new(RefCell::new(VecDeque::from(examples)));
267 let results_by_example_name = Rc::new(RefCell::new(HashMap::default()));
268
269 future::join_all((0..args.concurrency).map(|_| {
270 let app_state = app_state.clone();
271 let model = agent_model.model.clone();
272 let judge_model = judge_model.model.clone();
273 let zed_commit_sha = zed_commit_sha.clone();
274 let zed_branch_name = zed_branch_name.clone();
275 let run_id = run_id.clone();
276 let examples = examples.clone();
277 let results = results_by_example_name.clone();
278 cx.spawn(async move |cx| {
279 loop {
280 let Some(mut example) = examples.borrow_mut().pop_front() else {
281 break;
282 };
283 let result = async {
284 example.setup().await?;
285 let run_output = cx
286 .update(|cx| example.run(model.clone(), app_state.clone(), cx))?
287 .await?;
288 let judge_output = judge_example(
289 example.clone(),
290 judge_model.clone(),
291 &zed_commit_sha,
292 &zed_branch_name,
293 &run_id,
294 &run_output,
295 enable_telemetry,
296 cx,
297 )
298 .await;
299 anyhow::Ok((run_output, judge_output))
300 }
301 .await;
302 results
303 .borrow_mut()
304 .entry(example.name.clone())
305 .or_insert(Vec::new())
306 .push((example.clone(), result));
307 }
308 })
309 }))
310 .await;
311
312 print_report(
313 &mut results_by_example_name.borrow_mut(),
314 &mut cumulative_tool_metrics,
315 &run_dir,
316 )?;
317
318 app_state.client.telemetry().flush_events().await;
319
320 cx.update(|cx| cx.quit())
321 })
322 .detach_and_log_err(cx);
323 });
324}
325
326/// Subset of `workspace::AppState` needed by `HeadlessAssistant`, with additional fields.
327pub struct AgentAppState {
328 pub languages: Arc<LanguageRegistry>,
329 pub client: Arc<Client>,
330 pub user_store: Entity<UserStore>,
331 pub fs: Arc<dyn fs::Fs>,
332 pub node_runtime: NodeRuntime,
333
334 // Additional fields not present in `workspace::AppState`.
335 pub prompt_builder: Arc<PromptBuilder>,
336}
337
338pub fn init(cx: &mut App) -> Arc<AgentAppState> {
339 release_channel::init(SemanticVersion::default(), cx);
340 gpui_tokio::init(cx);
341
342 let mut settings_store = SettingsStore::new(cx);
343 settings_store
344 .set_default_settings(settings::default_settings().as_ref(), cx)
345 .unwrap();
346 cx.set_global(settings_store);
347 client::init_settings(cx);
348
349 // Set User-Agent so we can download language servers from GitHub
350 let user_agent = format!(
351 "Zed/{} ({}; {})",
352 AppVersion::global(cx),
353 std::env::consts::OS,
354 std::env::consts::ARCH
355 );
356 let proxy_str = ProxySettings::get_global(cx).proxy.to_owned();
357 let proxy_url = proxy_str
358 .as_ref()
359 .and_then(|input| input.parse().ok())
360 .or_else(read_proxy_from_env);
361 let http = {
362 let _guard = Tokio::handle(cx).enter();
363
364 ReqwestClient::proxy_and_user_agent(proxy_url, &user_agent)
365 .expect("could not start HTTP client")
366 };
367 cx.set_http_client(Arc::new(http));
368
369 Project::init_settings(cx);
370
371 let client = Client::production(cx);
372 cx.set_http_client(client.http_client());
373
374 let git_binary_path = None;
375 let fs = Arc::new(RealFs::new(
376 git_binary_path,
377 cx.background_executor().clone(),
378 ));
379
380 let mut languages = LanguageRegistry::new(cx.background_executor().clone());
381 languages.set_language_server_download_dir(paths::languages_dir().clone());
382 let languages = Arc::new(languages);
383
384 let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
385
386 extension::init(cx);
387
388 let (tx, rx) = async_watch::channel(None);
389 cx.observe_global::<SettingsStore>(move |cx| {
390 let settings = &ProjectSettings::get_global(cx).node;
391 let options = NodeBinaryOptions {
392 allow_path_lookup: !settings.ignore_system_version,
393 allow_binary_download: true,
394 use_paths: settings.path.as_ref().map(|node_path| {
395 let node_path = PathBuf::from(shellexpand::tilde(node_path).as_ref());
396 let npm_path = settings
397 .npm_path
398 .as_ref()
399 .map(|path| PathBuf::from(shellexpand::tilde(&path).as_ref()));
400 (
401 node_path.clone(),
402 npm_path.unwrap_or_else(|| {
403 let base_path = PathBuf::new();
404 node_path.parent().unwrap_or(&base_path).join("npm")
405 }),
406 )
407 }),
408 };
409 tx.send(Some(options)).log_err();
410 })
411 .detach();
412 let node_runtime = NodeRuntime::new(client.http_client(), None, rx);
413
414 let extension_host_proxy = ExtensionHostProxy::global(cx);
415
416 language::init(cx);
417 debug_adapter_extension::init(extension_host_proxy.clone(), cx);
418 language_extension::init(extension_host_proxy.clone(), languages.clone());
419 language_model::init(client.clone(), cx);
420 language_models::init(user_store.clone(), client.clone(), fs.clone(), cx);
421 languages::init(languages.clone(), node_runtime.clone(), cx);
422 prompt_store::init(cx);
423 terminal_view::init(cx);
424 let stdout_is_a_pty = false;
425 let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
426 agent::init(
427 fs.clone(),
428 client.clone(),
429 prompt_builder.clone(),
430 languages.clone(),
431 true,
432 cx,
433 );
434 assistant_tools::init(client.http_client(), cx);
435
436 SettingsStore::update_global(cx, |store, cx| {
437 store.set_user_settings(include_str!("../runner_settings.json"), cx)
438 })
439 .unwrap();
440
441 Arc::new(AgentAppState {
442 languages,
443 client,
444 user_store,
445 fs,
446 node_runtime,
447 prompt_builder,
448 })
449}
450
451pub fn find_model(
452 model_name: &str,
453 model_registry: &LanguageModelRegistry,
454 cx: &App,
455) -> anyhow::Result<Arc<dyn LanguageModel>> {
456 let selected = SelectedModel::from_str(model_name).map_err(|e| anyhow::anyhow!(e))?;
457 model_registry
458 .available_models(cx)
459 .find(|model| model.id() == selected.model && model.provider_id() == selected.provider)
460 .ok_or_else(|| {
461 anyhow::anyhow!(
462 "No language model with ID {}/{} was available. Available models: {}",
463 selected.model.0,
464 selected.provider.0,
465 model_registry
466 .available_models(cx)
467 .map(|model| format!("{}/{}", model.provider_id().0, model.id().0))
468 .collect::<Vec<_>>()
469 .join(", ")
470 )
471 })
472}
473
474pub fn load_model(model_name: &str, cx: &mut App) -> anyhow::Result<ConfiguredModel> {
475 let model = {
476 let model_registry = LanguageModelRegistry::read_global(cx);
477 find_model(model_name, model_registry, cx)?
478 };
479
480 let provider = {
481 let model_registry = LanguageModelRegistry::read_global(cx);
482 model_registry
483 .provider(&model.provider_id())
484 .ok_or_else(|| anyhow::anyhow!("Provider not found: {}", model.provider_id()))?
485 };
486
487 Ok(ConfiguredModel {
488 provider: provider.clone(),
489 model: model.clone(),
490 })
491}
492
493pub fn commit_sha_for_path(repo_path: &Path) -> String {
494 futures::executor::block_on(run_git(repo_path, &["rev-parse", "HEAD"])).unwrap()
495}
496
497pub fn git_branch_for_path(repo_path: &Path) -> String {
498 match std::env::var("GITHUB_REF_NAME") {
499 Ok(branch) => branch,
500 Err(_) => {
501 futures::executor::block_on(run_git(repo_path, &["rev-parse", "--abbrev-ref", "HEAD"]))
502 .unwrap_or_else(|_| "unknown".to_string())
503 }
504 }
505}
506
507async fn judge_example(
508 example: ExampleInstance,
509 model: Arc<dyn LanguageModel>,
510 zed_commit_sha: &str,
511 zed_branch_name: &str,
512 run_id: &str,
513 run_output: &RunOutput,
514 enable_telemetry: bool,
515 cx: &AsyncApp,
516) -> JudgeOutput {
517 let judge_output = example.judge(model.clone(), &run_output, cx).await;
518
519 if enable_telemetry {
520 telemetry::event!(
521 "Agent Example Evaluated",
522 zed_commit_sha = zed_commit_sha,
523 zed_branch_name = zed_branch_name,
524 run_id = run_id,
525 example_name = example.name.clone(),
526 example_repetition = example.repetition,
527 diff_evaluation = judge_output.diff.clone(),
528 thread_evaluation = judge_output.thread.clone(),
529 tool_metrics = run_output.tool_metrics,
530 response_count = run_output.response_count,
531 token_usage = run_output.token_usage,
532 model = model.telemetry_id(),
533 model_provider = model.provider_id().to_string(),
534 repository_url = example.repo_url(),
535 repository_revision = example.revision(),
536 diagnostic_summary_before = run_output.diagnostic_summary_before,
537 diagnostic_summary_after = run_output.diagnostic_summary_after,
538 diagnostics_before = run_output.diagnostics_before,
539 diagnostics_after = run_output.diagnostics_after,
540 );
541 }
542
543 judge_output
544}
545
546const HEADER_WIDTH: usize = 65;
547
548fn print_h1(header: &str) {
549 println!("\n\n{:=^HEADER_WIDTH$}", "");
550 println!("{:^HEADER_WIDTH$}", header);
551 println!("{:=^HEADER_WIDTH$}\n", "");
552}
553
554fn print_h2(header: &str) {
555 println!("\n{:-^HEADER_WIDTH$}", "");
556 println!("{:^HEADER_WIDTH$}", header);
557 println!("{:-^HEADER_WIDTH$}\n", "");
558}
559
560fn print_report(
561 results_by_example_name: &mut HashMap<
562 String,
563 Vec<(ExampleInstance, anyhow::Result<(RunOutput, JudgeOutput)>)>,
564 >,
565 cumulative_tool_metrics: &mut ToolMetrics,
566 run_dir: &Path,
567) -> anyhow::Result<()> {
568 print_h1("EVAL RESULTS");
569
570 let mut diff_scores = Vec::new();
571 let mut thread_scores = Vec::new();
572 let mut programmatic_scores = Vec::new();
573 let mut error_count = 0;
574
575 for (example_name, results) in results_by_example_name.iter_mut() {
576 print_h2(example_name);
577
578 results.sort_unstable_by_key(|(example, _)| example.repetition);
579 let mut example_cumulative_tool_metrics = ToolMetrics::default();
580
581 let mut table_rows = String::new();
582
583 for (example, result) in results.iter() {
584 match result {
585 Err(err) => {
586 display_error_row(&mut table_rows, example.repetition, err.to_string())?;
587 error_count += 1;
588 programmatic_scores.push(0.0);
589 diff_scores.push(0.0);
590 thread_scores.push(0.0);
591 }
592 Ok((run_output, judge_output)) => {
593 cumulative_tool_metrics.merge(&run_output.tool_metrics);
594 example_cumulative_tool_metrics.merge(&run_output.tool_metrics);
595
596 if run_output.programmatic_assertions.total_count() > 0 {
597 for assertion in &run_output.programmatic_assertions.ran {
598 assertions::display_table_row(
599 &mut table_rows,
600 example.repetition,
601 assertion,
602 )?;
603 }
604
605 programmatic_scores
606 .push(run_output.programmatic_assertions.passed_percentage())
607 }
608
609 if !judge_output.diff.is_empty() {
610 diff_scores.push(judge_output.diff.passed_percentage());
611
612 for assertion in &judge_output.diff.ran {
613 assertions::display_table_row(
614 &mut table_rows,
615 example.repetition,
616 assertion,
617 )?;
618 }
619 }
620
621 if !judge_output.thread.is_empty() {
622 thread_scores.push(judge_output.thread.passed_percentage());
623
624 for assertion in &judge_output.thread.ran {
625 assertions::display_table_row(
626 &mut table_rows,
627 example.repetition,
628 assertion,
629 )?;
630 }
631 }
632 }
633 }
634 }
635
636 let mut all_asserts = Vec::new();
637
638 if !table_rows.is_empty() {
639 assertions::print_table_header();
640 print!("{}", table_rows);
641
642 assertions::print_table_divider();
643
644 for (example, result) in results.iter() {
645 if let Ok((run_output, judge_output)) = result {
646 let asserts = [
647 run_output.programmatic_assertions.clone(),
648 judge_output.diff.clone(),
649 judge_output.thread.clone(),
650 ];
651 all_asserts.extend_from_slice(&asserts);
652 assertions::print_table_round_summary(
653 &example.repetition.to_string(),
654 asserts.iter(),
655 )
656 } else if let Err(err) = result {
657 let assert = AssertionsReport::error(err.to_string());
658 all_asserts.push(assert.clone());
659 assertions::print_table_round_summary(
660 &example.repetition.to_string(),
661 [assert].iter(),
662 )
663 }
664 }
665
666 assertions::print_table_divider();
667
668 assertions::print_table_round_summary("avg", all_asserts.iter());
669
670 assertions::print_table_footer();
671 }
672
673 if !example_cumulative_tool_metrics.is_empty() {
674 println!("{}", &example_cumulative_tool_metrics);
675 }
676 }
677
678 if results_by_example_name.len() > 1 {
679 print_h1("AGGREGATE");
680
681 if error_count > 0 {
682 println!("\n{error_count} examples failed to run!");
683 }
684
685 let programmatic_score_count = programmatic_scores.len();
686 if programmatic_score_count > 0 {
687 let average_programmatic_score = (programmatic_scores.into_iter().sum::<f32>()
688 / (programmatic_score_count as f32))
689 .floor();
690 println!("Average programmatic score: {average_programmatic_score}%");
691 }
692
693 let diff_score_count = diff_scores.len();
694 if diff_score_count > 0 {
695 let average_diff_score =
696 (diff_scores.into_iter().sum::<f32>() / (diff_score_count as f32)).floor();
697 println!("Average diff score: {average_diff_score}%");
698 }
699
700 let thread_score_count = thread_scores.len();
701
702 if thread_score_count > 0 {
703 let average_thread_score =
704 (thread_scores.into_iter().sum::<f32>() / (thread_score_count as f32)).floor();
705 println!("Average thread score: {average_thread_score}%");
706 }
707
708 println!("");
709
710 print_h2("CUMULATIVE TOOL METRICS");
711 println!("{}", cumulative_tool_metrics);
712 }
713
714 let explorer_output_path = run_dir.join("overview.html");
715 let mut json_paths: Vec<PathBuf> = results_by_example_name
716 .values()
717 .flat_map(|results| {
718 results.iter().map(|(example, _)| {
719 let absolute_path = run_dir.join(example.run_directory.join("last.messages.json"));
720 let cwd = std::env::current_dir().expect("Can't get current dir");
721 pathdiff::diff_paths(&absolute_path, cwd).unwrap_or_else(|| absolute_path.clone())
722 })
723 })
724 .collect::<Vec<_>>();
725 json_paths.sort();
726 if let Err(err) = explorer::generate_explorer_html(&json_paths, &explorer_output_path) {
727 eprintln!("Failed to generate explorer HTML: {}", err);
728 }
729
730 Ok(())
731}