Cargo.lock 🔗
@@ -4983,6 +4983,7 @@ dependencies = [
"language_models",
"languages",
"node_runtime",
+ "pathdiff",
"paths",
"project",
"prompt_store",
Oleksiy Syvokon created
This update generates a single self-contained .html file that shows an
overview of evaluation threads in the browser. It's useful for:
- Quickly reviewing results
- Sharing evaluation runs
- Debugging
- Comparing models (TBD)
Features:
- Export thread JSON from the UI
- Keyboard navigation (j/k or Ctrl + ←/→)
- Toggle between compact and full views
Generating the overview:
- `cargo run -p eval` will write this file in the run dir's root.
- Or you can call `cargo run -p eval --bin explorer` to generate it
without running evals.
Screenshot:

Release Notes:
- N/A
Cargo.lock | 1
crates/eval/Cargo.toml | 7
crates/eval/README.md | 18
crates/eval/docs/explorer.md | 27
crates/eval/src/eval.rs | 327 ++++++-----
crates/eval/src/explorer.html | 1045 +++++++++++++++++++++++++++++++++++++
crates/eval/src/explorer.rs | 75 ++
7 files changed, 1,351 insertions(+), 149 deletions(-)
@@ -4983,6 +4983,7 @@ dependencies = [
"language_models",
"languages",
"node_runtime",
+ "pathdiff",
"paths",
"project",
"prompt_store",
@@ -3,6 +3,7 @@ name = "eval"
version = "0.1.0"
publish.workspace = true
edition.workspace = true
+default-run = "eval"
[dependencies]
agent.workspace = true
@@ -31,6 +32,7 @@ language_model.workspace = true
language_models.workspace = true
languages = { workspace = true, features = ["load-grammars"] }
node_runtime.workspace = true
+pathdiff = "0.2"
paths.workspace = true
project.workspace = true
prompt_store.workspace = true
@@ -48,9 +50,14 @@ unindent.workspace = true
util.workspace = true
uuid = { version = "1.6", features = ["v4"] }
workspace-hack.workspace = true
+
[[bin]]
name = "eval"
path = "src/eval.rs"
+[[bin]]
+name = "explorer"
+path = "src/explorer.rs"
+
[lints]
workspace = true
@@ -5,3 +5,21 @@ This eval assumes the working directory is the root of the repository. Run it wi
```sh
cargo run -p eval
```
+
+## Explorer Tool
+
+The explorer tool generates a self-contained HTML view from one or more thread
+JSON file. It provides a visual interface to explore the agent thread, including
+tool calls and results. See [./docs/explorer.md](./docs/explorer.md) for more details.
+
+### Usage
+
+```sh
+cargo run -p eval --bin explorer -- --input <path-to-json-files> --output <output-html-path>
+```
+
+Example:
+
+```sh
+cargo run -p eval --bin explorer -- --input ./runs/2025-04-23_15-53-30/fastmcp_bugifx/*/last.messages.json --output /tmp/explorer.html
+```
@@ -0,0 +1,27 @@
+# Explorer
+
+Threads Explorer is a single self-contained HTML file that gives an overview of
+evaluation runs, while allowing for some interactivity.
+
+When you open a file, it gives you a _thread overview_, which looks like this:
+
+| Turn | Text | Tool | Result |
+| ---- | ------------------------------------ | -------------------------------------------- | --------------------------------------------- |
+| 1 | [User]: | | |
+| | Fix the bug: kwargs not passed... | | |
+| 2 | I'll help you fix that bug. | **list_directory**(path="fastmcp") | `fastmcp/src [...]` |
+| | | | |
+| 3 | Let's examine the code. | **read_file**(path="fastmcp/main.py", [...]) | `def run_application(app, \*\*kwargs): [...]` |
+| 4 | I found the issue. | **edit_file**(path="fastmcp/core.py", [...]) | `Made edit to fastmcp/core.py` |
+| 5 | Let's check if there are any errors. | **diagnostics**() | `No errors found` |
+
+### Implementation details
+
+`src/explorer.html` contains the template. You can open this template in a
+browser as is, and it will show some dummy values. But the main use is to set
+the `threadsData` variable with real data, which then will be used instead of
+the dummy values.
+
+`src/explorer.rs` takes one or more JSON files as generated by `cargo run -p
+eval`, and outputs an HTML file for rendering these threads. Refer dummy data
+in `explorer.html` for a sample format.
@@ -1,6 +1,7 @@
mod assertions;
mod example;
mod examples;
+mod explorer;
mod ids;
mod instance;
mod tool_metrics;
@@ -305,155 +306,11 @@ fn main() {
}))
.await;
- print_h1("EVAL RESULTS");
-
- let mut diff_scores = Vec::new();
- let mut thread_scores = Vec::new();
- let mut programmatic_scores = Vec::new();
- let mut error_count = 0;
-
- for (example_name, results) in results_by_example_name.borrow_mut().iter_mut() {
- print_h2(&example_name);
-
- results.sort_unstable_by_key(|(example, _)| example.repetition);
- let mut example_cumulative_tool_metrics = ToolMetrics::default();
-
- let mut table_rows = String::new();
-
- for (example, result) in results.iter() {
- match result {
- Err(err) => {
- display_error_row(
- &mut table_rows,
- example.repetition,
- err.to_string(),
- )?;
- error_count += 1;
- }
- Ok((run_output, judge_output)) => {
- cumulative_tool_metrics.merge(&run_output.tool_metrics);
- example_cumulative_tool_metrics.merge(&run_output.tool_metrics);
-
- if !run_output.programmatic_assertions.total_count() > 0 {
- for assertion in &run_output.programmatic_assertions.ran {
- assertions::display_table_row(
- &mut table_rows,
- example.repetition,
- assertion,
- )?;
- }
-
- programmatic_scores
- .push(run_output.programmatic_assertions.passed_percentage())
- }
-
- if !judge_output.diff.is_empty() {
- diff_scores.push(judge_output.diff.passed_percentage());
-
- for assertion in &judge_output.diff.ran {
- assertions::display_table_row(
- &mut table_rows,
- example.repetition,
- assertion,
- )?;
- }
- }
-
- if !judge_output.thread.is_empty() {
- thread_scores.push(judge_output.thread.passed_percentage());
-
- for assertion in &judge_output.thread.ran {
- assertions::display_table_row(
- &mut table_rows,
- example.repetition,
- assertion,
- )?;
- }
- }
- }
- }
- }
-
- if !table_rows.is_empty() {
- assertions::print_table_header();
- print!("{}", table_rows);
-
- assertions::print_table_divider();
-
- for (example, result) in results.iter() {
- if let Ok((run_output, judge_output)) = result {
- assertions::print_table_round_summary(
- &example.repetition.to_string(),
- [
- &run_output.programmatic_assertions,
- &judge_output.diff,
- &judge_output.thread,
- ]
- .into_iter(),
- )
- }
- }
-
- assertions::print_table_divider();
-
- assertions::print_table_round_summary(
- "avg",
- results.iter().flat_map(|(_, result)| {
- result.iter().flat_map(|(run_output, judge_output)| {
- [
- &run_output.programmatic_assertions,
- &judge_output.diff,
- &judge_output.thread,
- ]
- .into_iter()
- })
- }),
- );
-
- assertions::print_table_footer();
- }
-
- if !example_cumulative_tool_metrics.is_empty() {
- println!("{}", &example_cumulative_tool_metrics);
- }
- }
-
- if results_by_example_name.borrow().len() > 1 {
- print_h1("AGGREGATE");
-
- if error_count > 0 {
- println!("\n{error_count} examples failed to run!");
- }
-
- let programmatic_score_count = programmatic_scores.len();
- if programmatic_score_count > 0 {
- let average_programmatic_score = (programmatic_scores.into_iter().sum::<f32>()
- / (programmatic_score_count as f32))
- .floor();
- println!("Average programmatic score: {average_programmatic_score}%");
- }
-
- let diff_score_count = diff_scores.len();
- if diff_score_count > 0 {
- let average_diff_score =
- (diff_scores.into_iter().sum::<f32>() / (diff_score_count as f32)).floor();
- println!("Average diff score: {average_diff_score}%");
- }
-
- let thread_score_count = thread_scores.len();
-
- if thread_score_count > 0 {
- let average_thread_score = (thread_scores.into_iter().sum::<f32>()
- / (thread_score_count as f32))
- .floor();
- println!("Average thread score: {average_thread_score}%");
- }
-
- println!("");
-
- print_h2("CUMULATIVE TOOL METRICS");
- println!("{}", cumulative_tool_metrics);
- }
+ print_report(
+ &mut results_by_example_name.borrow_mut(),
+ &mut cumulative_tool_metrics,
+ &run_dir,
+ )?;
app_state.client.telemetry().flush_events().await;
@@ -670,3 +527,175 @@ fn print_h2(header: &str) {
println!("{:^HEADER_WIDTH$}", header);
println!("{:-^HEADER_WIDTH$}\n", "");
}
+
+fn print_report(
+ results_by_example_name: &mut HashMap<
+ String,
+ Vec<(ExampleInstance, anyhow::Result<(RunOutput, JudgeOutput)>)>,
+ >,
+ cumulative_tool_metrics: &mut ToolMetrics,
+ run_dir: &Path,
+) -> anyhow::Result<()> {
+ print_h1("EVAL RESULTS");
+
+ let mut diff_scores = Vec::new();
+ let mut thread_scores = Vec::new();
+ let mut programmatic_scores = Vec::new();
+ let mut error_count = 0;
+
+ for (example_name, results) in results_by_example_name.iter_mut() {
+ print_h2(example_name);
+
+ results.sort_unstable_by_key(|(example, _)| example.repetition);
+ let mut example_cumulative_tool_metrics = ToolMetrics::default();
+
+ let mut table_rows = String::new();
+
+ for (example, result) in results.iter() {
+ match result {
+ Err(err) => {
+ display_error_row(&mut table_rows, example.repetition, err.to_string())?;
+ error_count += 1;
+ }
+ Ok((run_output, judge_output)) => {
+ cumulative_tool_metrics.merge(&run_output.tool_metrics);
+ example_cumulative_tool_metrics.merge(&run_output.tool_metrics);
+
+ if !run_output.programmatic_assertions.total_count() > 0 {
+ for assertion in &run_output.programmatic_assertions.ran {
+ assertions::display_table_row(
+ &mut table_rows,
+ example.repetition,
+ assertion,
+ )?;
+ }
+
+ programmatic_scores
+ .push(run_output.programmatic_assertions.passed_percentage())
+ }
+
+ if !judge_output.diff.is_empty() {
+ diff_scores.push(judge_output.diff.passed_percentage());
+
+ for assertion in &judge_output.diff.ran {
+ assertions::display_table_row(
+ &mut table_rows,
+ example.repetition,
+ assertion,
+ )?;
+ }
+ }
+
+ if !judge_output.thread.is_empty() {
+ thread_scores.push(judge_output.thread.passed_percentage());
+
+ for assertion in &judge_output.thread.ran {
+ assertions::display_table_row(
+ &mut table_rows,
+ example.repetition,
+ assertion,
+ )?;
+ }
+ }
+ }
+ }
+ }
+
+ if !table_rows.is_empty() {
+ assertions::print_table_header();
+ print!("{}", table_rows);
+
+ assertions::print_table_divider();
+
+ for (example, result) in results.iter() {
+ if let Ok((run_output, judge_output)) = result {
+ assertions::print_table_round_summary(
+ &example.repetition.to_string(),
+ [
+ &run_output.programmatic_assertions,
+ &judge_output.diff,
+ &judge_output.thread,
+ ]
+ .into_iter(),
+ )
+ }
+ }
+
+ assertions::print_table_divider();
+
+ assertions::print_table_round_summary(
+ "avg",
+ results.iter().flat_map(|(_, result)| {
+ result.iter().flat_map(|(run_output, judge_output)| {
+ [
+ &run_output.programmatic_assertions,
+ &judge_output.diff,
+ &judge_output.thread,
+ ]
+ .into_iter()
+ })
+ }),
+ );
+
+ assertions::print_table_footer();
+ }
+
+ if !example_cumulative_tool_metrics.is_empty() {
+ println!("{}", &example_cumulative_tool_metrics);
+ }
+ }
+
+ if results_by_example_name.len() > 1 {
+ print_h1("AGGREGATE");
+
+ if error_count > 0 {
+ println!("\n{error_count} examples failed to run!");
+ }
+
+ let programmatic_score_count = programmatic_scores.len();
+ if programmatic_score_count > 0 {
+ let average_programmatic_score = (programmatic_scores.into_iter().sum::<f32>()
+ / (programmatic_score_count as f32))
+ .floor();
+ println!("Average programmatic score: {average_programmatic_score}%");
+ }
+
+ let diff_score_count = diff_scores.len();
+ if diff_score_count > 0 {
+ let average_diff_score =
+ (diff_scores.into_iter().sum::<f32>() / (diff_score_count as f32)).floor();
+ println!("Average diff score: {average_diff_score}%");
+ }
+
+ let thread_score_count = thread_scores.len();
+
+ if thread_score_count > 0 {
+ let average_thread_score =
+ (thread_scores.into_iter().sum::<f32>() / (thread_score_count as f32)).floor();
+ println!("Average thread score: {average_thread_score}%");
+ }
+
+ println!("");
+
+ print_h2("CUMULATIVE TOOL METRICS");
+ println!("{}", cumulative_tool_metrics);
+ }
+
+ let explorer_output_path = run_dir.join("overview.html");
+ let mut json_paths: Vec<PathBuf> = results_by_example_name
+ .values()
+ .flat_map(|results| {
+ results.iter().map(|(example, _)| {
+ let absolute_path = example.run_directory.join("last.messages.json");
+ pathdiff::diff_paths(&absolute_path, run_dir)
+ .unwrap_or_else(|| absolute_path.clone())
+ })
+ })
+ .collect::<Vec<_>>();
+ json_paths.sort();
+ if let Err(err) = explorer::generate_explorer_html(&json_paths, &explorer_output_path) {
+ eprintln!("Failed to generate explorer HTML: {}", err);
+ }
+
+ Ok(())
+}
@@ -0,0 +1,1045 @@
+<!doctype html>
+<html lang="en">
+ <head>
+ <meta charset="UTF-8" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+ <title>Eval Explorer</title>
+ <style>
+ :root {
+ /* Light theme (default) */
+ --bg-color: #ffffff;
+ --text-color: #333333;
+ --header-bg: #f8f8f8;
+ --border-color: #eaeaea;
+ --code-bg: #f5f5f5;
+ --link-color: #0066cc;
+ --button-bg: #f5f5f5;
+ --button-border: #ddd;
+ --button-active-bg: #0066cc;
+ --button-active-color: white;
+ --button-active-border: #0055aa;
+ --preview-bg: #f5f5f5;
+ --table-line: #f0f0f0;
+ }
+
+ /* Dark theme */
+ [data-theme="dark"] {
+ --bg-color: #1e1e1e;
+ --text-color: #e0e0e0;
+ --header-bg: #2d2d2d;
+ --border-color: #444444;
+ --code-bg: #2a2a2a;
+ --link-color: #4da6ff;
+ --button-bg: #333333;
+ --button-border: #555555;
+ --button-active-bg: #0066cc;
+ --button-active-color: white;
+ --button-active-border: #0055aa;
+ --preview-bg: #2a2a2a;
+ --table-line: #333333;
+ }
+
+ /* Apply theme variables */
+ body {
+ font-family: monospace;
+ line-height: 1.6;
+ margin: 0;
+ padding: 20px;
+ color: var(--text-color);
+ max-width: 1200px;
+ margin: 0 auto;
+ background-color: var(--bg-color);
+ }
+ h1 {
+ margin-bottom: 20px;
+ border-bottom: 1px solid var(--border-color);
+ padding-bottom: 10px;
+ font-family: monospace;
+ }
+ table {
+ width: 100%;
+ border-collapse: collapse;
+ margin-bottom: 20px;
+ table-layout: fixed; /* Ensure fixed width columns */
+ }
+ th,
+ td {
+ padding: 10px;
+ text-align: left;
+ border-bottom: 1px dotted var(--border-color);
+ vertical-align: top;
+ word-wrap: break-word; /* Ensure long content wraps */
+ overflow-wrap: break-word;
+ }
+ th {
+ background-color: var(--header-bg);
+ font-weight: 600;
+ }
+ .collapsible {
+ cursor: pointer;
+ color: var(--link-color);
+ text-decoration: underline;
+ }
+ .hidden {
+ display: none;
+ }
+ .tool-name {
+ font-weight: bold;
+ }
+ .tool-params {
+ padding-left: 20px;
+ color: #666;
+ }
+ pre {
+ background-color: var(--code-bg);
+ padding: 10px;
+ border-radius: 5px;
+ overflow-x: auto;
+ max-height: 200px;
+ margin: 10px 0;
+ font-family: monospace;
+ width: 100%;
+ box-sizing: border-box;
+ white-space: pre-wrap; /* Ensure text wraps */
+ color: var(--text-color);
+ }
+ code {
+ font-family: monospace;
+ }
+
+ /* Column sizing */
+ .turn-column {
+ width: 3%;
+ max-width: 3%;
+ }
+ .text-column {
+ width: 22%;
+ max-width: 22%;
+ }
+ .tool-column {
+ width: 38%;
+ max-width: 38%;
+ }
+ .result-column {
+ width: 37%;
+ max-width: 37%;
+ overflow-x: auto;
+ }
+
+ /* Content formatting */
+ .text-content {
+ font-family:
+ system-ui,
+ -apple-system,
+ BlinkMacSystemFont,
+ "Segoe UI",
+ Roboto,
+ Oxygen,
+ Ubuntu,
+ Cantarell,
+ "Open Sans",
+ "Helvetica Neue",
+ sans-serif;
+ font-size: 0.7rem;
+ }
+ .action-container .action-preview,
+ .action-container .action-full {
+ margin-bottom: 5px;
+ }
+ .preview-content {
+ white-space: pre-wrap;
+ margin-bottom: 5px;
+ background-color: var(--preview-bg);
+ padding: 10px;
+ border-radius: 5px;
+ font-family: monospace;
+ width: 100%;
+ box-sizing: border-box;
+ overflow-wrap: break-word;
+ color: var(--text-color);
+ }
+ .show-more {
+ color: var(--link-color);
+ cursor: pointer;
+ text-decoration: none;
+ display: block;
+ margin-top: 5px;
+ }
+ .more-inline {
+ color: var(--link-color);
+ cursor: pointer;
+ text-decoration: none;
+ display: inline;
+ margin-left: 5px;
+ }
+
+ /* Compact mode styles */
+ .compact-mode td {
+ padding: 5px; /* Reduced padding in compact mode */
+ }
+
+ .compact-mode .preview-content {
+ padding: 2px;
+ margin-bottom: 2px;
+ }
+
+ .compact-mode pre {
+ padding: 5px;
+ margin: 5px 0;
+ white-space: pre; /* Don't wrap code in compact mode */
+ overflow-x: auto; /* Add horizontal scrollbar */
+ }
+
+ .compact-mode .result-column pre,
+ .compact-mode .result-column .preview-content {
+ max-width: 100%;
+ overflow-x: auto;
+ white-space: pre;
+ }
+
+ /* Make action containers more compact */
+ .compact-mode .action-container {
+ margin-bottom: 2px;
+ }
+
+ /* Reduce space between turns */
+ .compact-mode tr {
+ border-bottom: 1px solid var(--table-line);
+ }
+
+ /* Tool params more compact */
+ .compact-mode .tool-params {
+ padding-left: 10px;
+ margin-top: 2px;
+ }
+
+ hr {
+ margin: 10px 0;
+ border: 0;
+ height: 1px;
+ background-color: var(--border-color);
+ }
+
+ /* View switcher */
+ .view-switcher {
+ display: flex;
+ gap: 10px;
+ margin-bottom: 20px;
+ align-items: center;
+ }
+
+ .view-button {
+ background-color: var(--button-bg);
+ border: 1px solid var(--button-border);
+ border-radius: 4px;
+ padding: 5px 15px;
+ cursor: pointer;
+ font-family: monospace;
+ font-size: 0.9rem;
+ transition: all 0.2s ease;
+ color: var(--text-color);
+ }
+
+ .view-button:hover {
+ background-color: var(--button-border);
+ }
+
+ .view-button.active {
+ background-color: var(--button-active-bg);
+ color: var(--button-active-color);
+ border-color: var(--button-active-border);
+ }
+
+ /* Navigation bar styles */
+ .thread-navigation {
+ display: flex;
+ align-items: center;
+ margin-bottom: 20px;
+ padding: 10px 0;
+ border-bottom: 1px solid var(--border-color);
+ }
+
+ .nav-button {
+ background-color: var(--button-bg);
+ border: 1px solid var(--button-border);
+ border-radius: 4px;
+ padding: 5px 15px;
+ cursor: pointer;
+ font-family: monospace;
+ font-size: 0.9rem;
+ transition: all 0.2s ease;
+ color: var(--text-color);
+ }
+
+ .nav-button:hover:not(:disabled) {
+ background-color: var(--button-border);
+ }
+
+ .nav-button:disabled {
+ opacity: 0.5;
+ cursor: not-allowed;
+ }
+
+ .thread-indicator {
+ margin: 0 15px;
+ font-size: 1rem;
+ flex-grow: 1;
+ text-align: center;
+ }
+
+ #thread-id {
+ font-weight: bold;
+ }
+
+ /* Theme switcher */
+ .theme-switcher {
+ margin-left: auto;
+ display: flex;
+ align-items: center;
+ }
+
+ .theme-button {
+ background-color: var(--button-bg);
+ border: 1px solid var(--button-border);
+ border-radius: 4px;
+ padding: 5px 10px;
+ cursor: pointer;
+ font-size: 0.9rem;
+ transition: all 0.2s ease;
+ color: var(--text-color);
+ display: flex;
+ align-items: center;
+ }
+
+ .theme-button:hover {
+ background-color: var(--button-border);
+ }
+
+ .theme-icon {
+ margin-right: 5px;
+ font-size: 1rem;
+ }
+ </style>
+ </head>
+ <body>
+ <h1 id="current-filename">Thread Explorer</h1>
+ <div class="view-switcher">
+ <button
+ id="full-view"
+ class="view-button active"
+ onclick="switchView('full')"
+ >
+ Full View
+ </button>
+ <button
+ id="compact-view"
+ class="view-button"
+ onclick="switchView('compact')"
+ >
+ Compact View
+ </button>
+ <button
+ id="export-button"
+ class="view-button"
+ onclick="exportThreadAsJson()"
+ title="Export current thread as JSON"
+ >
+ Export
+ </button>
+ <div class="theme-switcher">
+ <button
+ id="theme-toggle"
+ class="theme-button"
+ onclick="toggleTheme()"
+ >
+ <span id="theme-icon" class="theme-icon">☀️</span>
+ <span id="theme-text">Light</span>
+ </button>
+ </div>
+ </div>
+ <div class="thread-navigation">
+ <button
+ id="prev-thread"
+ class="nav-button"
+ onclick="previousThread()"
+ title="Previous thread (Ctrl+←, k, or h)"
+ disabled
+ >
+ ← Previous
+ </button>
+ <div class="thread-indicator">
+ Thread <span id="current-thread-index">1</span> of
+ <span id="total-threads">1</span>:
+ <span id="thread-id">Default Thread</span>
+ </div>
+ <button
+ id="next-thread"
+ class="nav-button"
+ onclick="nextThread()"
+ title="Next thread (Ctrl+→, j, or l)"
+ disabled
+ >
+ Next →
+ </button>
+ </div>
+ <table id="thread-table">
+ <thead>
+ <tr>
+ <th class="turn-column">Turn</th>
+ <th class="text-column">Text</th>
+ <th class="tool-column">Tool</th>
+ <th class="result-column">Result</th>
+ </tr>
+ </thead>
+ <tbody id="thread-body">
+ <!-- Content will be filled dynamically -->
+ </tbody>
+ </table>
+
+ <script>
+ // View mode - 'full' or 'compact'
+ let viewMode = "full";
+
+ // Theme mode - 'light', 'dark', or 'system'
+ let themeMode = localStorage.getItem("theme") || "system";
+
+ // Function to apply theme
+ function applyTheme(theme) {
+ const themeIcon = document.getElementById("theme-icon");
+ const themeText = document.getElementById("theme-text");
+
+ if (theme === "dark") {
+ document.documentElement.setAttribute("data-theme", "dark");
+ themeIcon.textContent = "🌙";
+ themeText.textContent = "Dark";
+ } else {
+ document.documentElement.removeAttribute("data-theme");
+ themeIcon.textContent = "☀️";
+ themeText.textContent = "Light";
+ }
+ }
+
+ // Function to toggle between light and dark themes
+ function toggleTheme() {
+ // If currently system or light, switch to dark
+ if (themeMode === "system") {
+ const systemDark = window.matchMedia(
+ "(prefers-color-scheme: dark)",
+ ).matches;
+ themeMode = systemDark ? "light" : "dark";
+ } else {
+ themeMode = themeMode === "light" ? "dark" : "light";
+ }
+
+ // Save preference
+ localStorage.setItem("theme", themeMode);
+
+ // Apply theme
+ applyTheme(themeMode);
+ }
+
+ // Initialize theme based on system or saved preference
+ function initTheme() {
+ if (themeMode === "system") {
+ // Use system preference
+ const systemDark = window.matchMedia(
+ "(prefers-color-scheme: dark)",
+ ).matches;
+ applyTheme(systemDark ? "dark" : "light");
+
+ // Listen for system theme changes
+ window
+ .matchMedia("(prefers-color-scheme: dark)")
+ .addEventListener("change", (e) => {
+ if (themeMode === "system") {
+ applyTheme(e.matches ? "dark" : "light");
+ }
+ });
+ } else {
+ // Use saved preference
+ applyTheme(themeMode);
+ }
+ }
+
+ // Function to switch between view modes
+ function switchView(mode) {
+ viewMode = mode;
+
+ // Update button states
+ document
+ .getElementById("full-view")
+ .classList.toggle("active", mode === "full");
+ document
+ .getElementById("compact-view")
+ .classList.toggle("active", mode === "compact");
+
+ // Add or remove compact-mode class on the body
+ document.body.classList.toggle(
+ "compact-mode",
+ mode === "compact",
+ );
+
+ // Re-render the thread with the new view mode
+ renderThread();
+ }
+
+ // Function to export the current thread as a JSON file
+ function exportThreadAsJson() {
+ // Clone the thread to avoid modifying the original
+ const threadToExport = JSON.parse(JSON.stringify(thread));
+
+ // Create a Blob with the JSON data
+ const blob = new Blob(
+ [JSON.stringify(threadToExport, null, 2)],
+ { type: "application/json" }
+ );
+
+ // Create a download link
+ const url = URL.createObjectURL(blob);
+ const a = document.createElement("a");
+ a.href = url;
+
+ // Generate filename based on thread ID or index
+ const filename = threadToExport.thread_id ||
+ threadToExport.filename ||
+ `thread-${currentThreadIndex + 1}.json`;
+ a.download = filename.endsWith(".json") ? filename : `${filename}.json`;
+
+ // Trigger the download
+ document.body.appendChild(a);
+ a.click();
+
+ // Clean up
+ setTimeout(() => {
+ document.body.removeChild(a);
+ URL.revokeObjectURL(url);
+ }, 0);
+ }
+ // Default dummy thread data for preview purposes
+ let dummyThread = {
+ messages: [
+ {
+ role: "system",
+ content: [{ Text: "System prompt..." }],
+ },
+ {
+ role: "user",
+ content: [
+ { Text: "Fix the bug: kwargs not passed..." },
+ ],
+ },
+ {
+ role: "assistant",
+ content: [
+ { Text: "I'll help you fix that bug." },
+ {
+ ToolUse: {
+ name: "list_directory",
+ input: { path: "fastmcp" },
+ is_input_complete: true,
+ },
+ },
+ ],
+ },
+ {
+ role: "user",
+ content: [
+ {
+ ToolResult: {
+ tool_name: "list_directory",
+ is_error: false,
+ content:
+ "fastmcp/src\nfastmcp/tests\nfastmcp/README.md\nfastmcp/pyproject.toml\nfastmcp/.gitignore\nfastmcp/setup.py\nfastmcp/examples\nfastmcp/LICENSE",
+ },
+ },
+ ],
+ },
+ {
+ role: "assistant",
+ content: [
+ { Text: "Let's examine the code." },
+ {
+ ToolUse: {
+ name: "read_file",
+ input: {
+ path: "fastmcp/main.py",
+ start_line: 253,
+ end_line: 360,
+ },
+ is_input_complete: true,
+ },
+ },
+ ],
+ },
+ {
+ role: "user",
+ content: [
+ {
+ ToolResult: {
+ tool_name: "read_file",
+ is_error: false,
+ content:
+ "def run_application(app, **kwargs):\n return anyio.run(app, **kwargs)\n\nasync def start_server():\n # More code...\n # Multiple lines of code that would be displayed\n # when clicking on the show more link\n app = create_app()\n await run_app(app)\n\ndef main():\n # Initialize everything\n anyio.run(start_server)\n # Even more code here\n # that would be shown when the user\n # expands the content",
+ },
+ },
+ ],
+ },
+ {
+ role: "assistant",
+ content: [
+ { Text: "I found the issue." },
+ {
+ ToolUse: {
+ name: "edit_file",
+ input: {
+ path: "fastmcp/core.py",
+ old_string:
+ "def start_server(app):\n anyio.run(app)",
+ new_string:
+ "def start_server(app, **kwargs):\n anyio.run(app, **kwargs)",
+ display_description:
+ "Fix kwargs passing to anyio.run",
+ },
+ is_input_complete: true,
+ },
+ },
+ ],
+ },
+ {
+ role: "user",
+ content: [
+ {
+ ToolResult: {
+ tool_name: "edit_file",
+ is_error: false,
+ content: "Made edit to fastmcp/core.py",
+ },
+ },
+ ],
+ },
+ {
+ role: "assistant",
+ content: [
+ { Text: "Let's check if there are any errors." },
+ {
+ ToolUse: {
+ name: "diagnostics",
+ input: {},
+ is_input_complete: true,
+ },
+ },
+ ],
+ },
+ {
+ role: "user",
+ content: [
+ {
+ ToolResult: {
+ tool_name: "diagnostics",
+ is_error: false,
+ content: "No errors found",
+ },
+ },
+ ],
+ },
+ ],
+ };
+
+ // The actual thread data will be injected here when opened by eval
+ let threadsData = window.threadsData || { threads: [dummyThread] };
+
+ // Initialize thread variables
+ let threads = threadsData.threads;
+ let currentThreadIndex = 0;
+ let thread = threads[currentThreadIndex];
+
+ // Function to navigate to the previous thread
+ function previousThread() {
+ if (currentThreadIndex > 0) {
+ currentThreadIndex--;
+ switchToThread(currentThreadIndex);
+ }
+ }
+
+ // Function to navigate to the next thread
+ function nextThread() {
+ if (currentThreadIndex < threads.length - 1) {
+ currentThreadIndex++;
+ switchToThread(currentThreadIndex);
+ }
+ }
+
+ // Function to switch to a specific thread by index
+ function switchToThread(index) {
+ if (index >= 0 && index < threads.length) {
+ currentThreadIndex = index;
+ thread = threads[currentThreadIndex];
+ updateNavigationButtons();
+ renderThread();
+ }
+ }
+
+ // Function to update the navigation buttons state
+ function updateNavigationButtons() {
+ document.getElementById("prev-thread").disabled =
+ currentThreadIndex <= 0;
+ document.getElementById("next-thread").disabled =
+ currentThreadIndex >= threads.length - 1;
+ document.getElementById("current-thread-index").textContent =
+ currentThreadIndex + 1;
+ document.getElementById("total-threads").textContent =
+ threads.length;
+ }
+
+ function renderThread() {
+ const tbody = document.getElementById("thread-body");
+ tbody.innerHTML = ""; // Clear existing content
+
+ // Set thread name if available
+ const threadId =
+ thread.thread_id || `Thread ${currentThreadIndex + 1}`;
+ document.getElementById("thread-id").textContent = threadId;
+
+ // Set filename in the header if available
+ const filename =
+ thread.filename || `Thread ${currentThreadIndex + 1}`;
+ document.getElementById("current-filename").textContent =
+ filename;
+
+ // Skip system message
+ const nonSystemMessages = thread.messages.filter(
+ (msg) => msg.role !== "system",
+ );
+
+ let turnNumber = 0;
+ processMessages(nonSystemMessages, tbody, turnNumber);
+ }
+
+ function processMessages(messages, tbody) {
+ let turnNumber = 0;
+
+ for (let i = 0; i < messages.length; i++) {
+ const msg = messages[i];
+
+ if (isUserQuery(msg)) {
+ // User message starts a new turn
+ turnNumber++;
+ renderUserMessage(msg, turnNumber, tbody);
+ } else if (msg.role === "assistant") {
+ // Each assistant message is one turn
+ turnNumber++;
+
+ // Collect all text content and tool uses for this turn
+ let assistantText = "";
+ let toolUses = [];
+
+ // First, collect all text content
+ for (const content of msg.content) {
+ if (content.hasOwnProperty("Text")) {
+ if (assistantText) {
+ assistantText +=
+ "<br><br>" +
+ formatContent(content.Text);
+ } else {
+ assistantText = formatContent(content.Text);
+ }
+ } else if (content.hasOwnProperty("ToolUse")) {
+ toolUses.push(content.ToolUse);
+ }
+ }
+
+ // Create a single row for this turn with text and tools
+ const row = document.createElement("tr");
+ row.id = `assistant-turn-${turnNumber}`;
+
+ // Start with the turn number and assistant text
+ row.innerHTML = `
+ <td class="text-content">${turnNumber}</td>
+ <td class="text-content"><!--Assistant: <br/ -->${assistantText}</td>
+ <td id="tools-${turnNumber}"></td>
+ <td id="results-${turnNumber}"></td>
+ `;
+
+ tbody.appendChild(row);
+
+ // Add all tool calls to the tools cell
+ const toolsCell = document.getElementById(
+ `tools-${turnNumber}`,
+ );
+ const resultsCell = document.getElementById(
+ `results-${turnNumber}`,
+ );
+
+ // Process all tools and their results
+ for (let j = 0; j < toolUses.length; j++) {
+ const toolUse = toolUses[j];
+ const toolCall = formatToolCall(
+ toolUse.name,
+ toolUse.input,
+ );
+
+ // Add the tool call to the tools cell
+ if (j > 0) toolsCell.innerHTML += "<hr>";
+ toolsCell.innerHTML += toolCall;
+
+ // Look for corresponding tool result
+ if (
+ hasMatchingToolResult(messages, i, toolUse.name)
+ ) {
+ const resultMsg = messages[i + 1];
+ const toolResult = findToolResult(
+ resultMsg,
+ toolUse.name,
+ );
+
+ if (toolResult) {
+ // Add the result to the results cell
+ if (j > 0) resultsCell.innerHTML += "<hr>";
+
+ // Create a container for the result
+ const resultDiv =
+ document.createElement("div");
+ resultDiv.className = "tool-result";
+
+ // Format and display the tool result
+ formatToolResultInline(
+ toolResult.content,
+ resultDiv,
+ );
+ resultsCell.appendChild(resultDiv);
+
+ // Skip the result message in the next iteration
+ if (j === toolUses.length - 1) {
+ i++;
+ }
+ }
+ }
+ }
+ } else if (
+ msg.role === "user" &&
+ msg.content.some((c) => c.hasOwnProperty("ToolResult"))
+ ) {
+ // Skip tool result messages as they are handled with their corresponding tool use
+ continue;
+ }
+ }
+ }
+
+ function isUserQuery(message) {
+ return (
+ message.role === "user" &&
+ !message.content.some((c) => c.hasOwnProperty("ToolResult"))
+ );
+ }
+
+ function renderUserMessage(message, turnNumber, tbody) {
+ const row = document.createElement("tr");
+ row.innerHTML = `
+ <td>${turnNumber}</td>
+ <td class="text-content"><b>[User]:</b><br/> ${formatContent(message.content[0].Text)}</td>
+ <td></td>
+ <td></td>
+ `;
+ tbody.appendChild(row);
+ }
+
+ function hasMatchingToolResult(messages, currentIndex, toolName) {
+ return (
+ currentIndex + 1 < messages.length &&
+ messages[currentIndex + 1].role === "user" &&
+ messages[currentIndex + 1].content.some(
+ (c) =>
+ c.hasOwnProperty("ToolResult") &&
+ c.ToolResult.tool_name === toolName,
+ )
+ );
+ }
+
+ function findToolResult(resultMessage, toolName) {
+ const toolResultContent = resultMessage.content.find(
+ (c) =>
+ c.hasOwnProperty("ToolResult") &&
+ c.ToolResult.tool_name === toolName,
+ );
+
+ return toolResultContent ? toolResultContent.ToolResult : null;
+ }
+ function formatToolCall(name, input) {
+ // In compact mode, format tool calls on a single line
+ if (viewMode === "compact") {
+ const params = [];
+ const fullParams = [];
+
+ // Process all parameters
+ for (const [key, value] of Object.entries(input)) {
+ if (value !== null && value !== undefined) {
+ // Store full parameter for expanded view
+ let fullValue =
+ typeof value === "string"
+ ? `"${value}"`
+ : value;
+ fullParams.push([key, fullValue]);
+
+ // Abbreviated value for compact view
+ let displayValue = fullValue;
+ if (
+ typeof value === "string" &&
+ value.length > 30
+ ) {
+ displayValue = `"${value.substring(0, 30)}..."`;
+ }
+ params.push(`${key}=${displayValue}`);
+ }
+ }
+
+ const paramString = params.join(", ");
+ const fullLine = `<span class="tool-name">${name}</span>(${paramString})`;
+
+ // If the line is too long, add a [more] link
+ if (fullLine.length > 80 || params.length > 1) {
+ // Create a container with the compact and full views
+ const compactView = `<span class="tool-name">${name}</span>(${params[0]}, <span class="more-inline" onclick="toggleActionVisibility(this)">[...]</span>)`;
+
+ // For the full view, use the original untruncated values
+ let result = `<span class="tool-name">${name}</span>(`;
+ const formattedParams = fullParams
+ .map(
+ (p) =>
+ ` ${p[0]}=${p[1]}`,
+ )
+ .join(",<br/>");
+ const fullView = `${result}<br/>${formattedParams}<br/>)`;
+
+ return `<div class="action-container">
+ <div class="action-preview">${compactView}</div>
+ <div class="action-full hidden">${fullView}</div>
+ </div>`;
+ }
+
+ return fullLine;
+ }
+
+ // Regular (full) view formatting with multiple lines
+ let result = `<span class="tool-name">${name}</span>(`;
+ const params = [];
+ for (const [key, value] of Object.entries(input)) {
+ if (value !== null && value !== undefined) {
+ // Format different types of values
+ let formattedValue =
+ typeof value === "string" ? `"${value}"` : value;
+ params.push([key, formattedValue]);
+ }
+ }
+
+ if (params.length === 0) {
+ return `${result})`;
+ } else if (params.length === 1) {
+ // For single parameter, just show the value without the parameter name
+ return `${result}${params[0][1]})`;
+ } else {
+ // Format parameters
+ const formattedParams = params
+ .map((p) => ` ${p[0]}=${p[1]}`)
+ .join(",<br/>");
+ return `${result}<br/>${formattedParams}<br/>)`;
+ }
+ }
+
+ function toggleActionVisibility(element, remainingLines) {
+ const container = element.closest(".action-container");
+ const preview = container.querySelector(".action-preview");
+ const full = container.querySelector(".action-full");
+
+ // Once expanded, keep it expanded
+ full.classList.remove("hidden");
+ preview.classList.add("hidden");
+ }
+
+ function formatToolResultInline(content, targetElement) {
+ // Count lines
+ const lines = content.split("\n");
+
+ // In compact mode, show only 1 line with [more] link
+ if (viewMode === "compact" && lines.length > 1) {
+ // Create container
+ const container = document.createElement("div");
+
+ // Preview content
+ const previewDiv = document.createElement("div");
+ previewDiv.className = "preview-content";
+
+ // Add the first line of content plus [more] link
+ const previewContent = lines[0];
+ previewDiv.innerHTML =
+ escapeHtml(previewContent) +
+ ` <span class="more-inline" onclick="toggleResultVisibility(this)">[...]</span>`;
+
+ // Full content (initially hidden)
+ const contentDiv = document.createElement("pre");
+ contentDiv.className = "hidden";
+ contentDiv.innerHTML = escapeHtml(content);
+
+ container.appendChild(previewDiv);
+ container.appendChild(contentDiv);
+ targetElement.appendChild(container);
+ } else {
+ // For full view or short results, display everything
+ const preElement = document.createElement("pre");
+ preElement.textContent = content;
+ targetElement.appendChild(preElement);
+ }
+ }
+
+ function toggleResultVisibility(element, remainingLines) {
+ const container = element.parentElement.parentElement;
+ const preview = container.querySelector(".preview-content");
+ const full = container.querySelector("pre");
+
+ // Once expanded, keep it expanded
+ full.classList.remove("hidden");
+ preview.classList.add("hidden");
+ }
+
+ function formatContent(text) {
+ return escapeHtml(text);
+ }
+
+ function escapeHtml(text) {
+ const div = document.createElement("div");
+ div.textContent = text;
+ return div.innerHTML;
+ }
+
+ // Keyboard navigation handler
+ document.addEventListener("keydown", function (event) {
+ // previous thread
+ if (
+ (event.ctrlKey && event.key === "ArrowLeft") ||
+ event.key === "h" ||
+ event.key === "k"
+ ) {
+ if (!document.getElementById("prev-thread").disabled) {
+ previousThread();
+ }
+ }
+ // next thread
+ else if (
+ (event.ctrlKey && event.key === "ArrowRight") ||
+ event.key === "j" ||
+ event.key === "l"
+ ) {
+ if (!document.getElementById("next-thread").disabled) {
+ nextThread();
+ }
+ }
+ });
+
+ // Initialize the page
+ document.addEventListener("DOMContentLoaded", function () {
+ initTheme();
+ updateNavigationButtons();
+ renderThread();
+ });
+ </script>
+ </body>
+</html>
@@ -0,0 +1,75 @@
+use anyhow::{Context, Result, anyhow};
+use clap::Parser;
+use serde_json::{Value, json};
+use std::fs;
+use std::path::PathBuf;
+
+#[derive(Parser, Debug)]
+#[clap(about = "Generate HTML explorer from JSON thread files")]
+struct Args {
+ /// Paths to JSON files containing thread data
+ #[clap(long, required = true, num_args = 1..)]
+ input: Vec<PathBuf>,
+
+ /// Path where the HTML explorer file will be written
+ #[clap(long)]
+ output: PathBuf,
+}
+
+pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<String> {
+ if let Some(parent) = output.parent() {
+ if !parent.exists() {
+ fs::create_dir_all(parent).context(format!(
+ "Failed to create output directory: {}",
+ parent.display()
+ ))?;
+ }
+ }
+
+ let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html");
+ let template = fs::read_to_string(&template_path).context(format!(
+ "Template file not found or couldn't be read: {}",
+ template_path.display()
+ ))?;
+
+ let threads = inputs
+ .iter()
+ .map(|input_path| {
+ let mut thread_data: Value = fs::read_to_string(input_path)
+ .context(format!("Failed to read file: {}", input_path.display()))?
+ .parse::<Value>()
+ .context(format!("Failed to parse JSON: {}", input_path.display()))?;
+ thread_data["filename"] = json!(input_path); // This will be shown in a thread heading
+ Ok(thread_data)
+ })
+ .collect::<Result<Vec<_>>>()?;
+
+ let all_threads = json!({ "threads": threads });
+ let html_content = inject_thread_data(template, all_threads)?;
+ fs::write(&output, &html_content)
+ .context(format!("Failed to write output: {}", output.display()))?;
+
+ println!("Saved {} thread(s) to {}", threads.len(), output.display());
+ Ok(html_content)
+}
+
+fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
+ let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };";
+ template
+ .find(injection_marker)
+ .ok_or_else(|| anyhow!("Could not find the thread injection point in the template"))?;
+
+ let threads_json = serde_json::to_string_pretty(&threads_data)
+ .context("Failed to serialize threads data to JSON")?;
+ let script_injection = format!("let threadsData = {};", threads_json);
+ let final_html = template.replacen(injection_marker, &script_injection, 1);
+
+ Ok(final_html)
+}
+
+#[cfg(not(any(test, doctest)))]
+#[allow(dead_code)]
+fn main() -> Result<()> {
+ let args = Args::parse();
+ generate_explorer_html(&args.input, &args.output).map(|_| ())
+}