@@ -12,8 +12,10 @@ This eval tests a fix for a destructive behavior of the `edit_file` tool.
Previously, it would rewrite existing files too aggressively, which often
resulted in content loss.
-Pass rate before the fix: 10%
-Pass rate after the fix: 100%
+Model | Pass rate
+----------------|----------
+Sonnet 3.7 | 100%
+Gemini 2.5 Pro | 80%
*/
#[async_trait(?Send)]
@@ -38,7 +40,9 @@ impl Example for FileOverwriteExample {
let input = tool_use.parse_input::<EditFileToolInput>()?;
match input.mode {
EditFileMode::Edit => false,
- EditFileMode::Create | EditFileMode::Overwrite => true,
+ EditFileMode::Create | EditFileMode::Overwrite => {
+ input.path.ends_with("src/language_model_selector.rs")
+ }
}
} else {
false
@@ -2,22 +2,65 @@ use anyhow::{Context as _, Result};
use clap::Parser;
use serde_json::{Value, json};
use std::fs;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
#[derive(Parser, Debug)]
#[clap(about = "Generate HTML explorer from JSON thread files")]
struct Args {
- /// Paths to JSON files containing thread data
+ /// Paths to JSON files or directories. If a directory is provided,
+ /// it will be searched for 'last.messages.json' files up to 2 levels deep.
#[clap(long, required = true, num_args = 1..)]
input: Vec<PathBuf>,
- /// Path where the HTML explorer file will be written
+ /// Path where the output HTML file will be written
#[clap(long)]
output: PathBuf,
}
-pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<String> {
- if let Some(parent) = output.parent() {
+/// Recursively finds files with `target_filename` in `dir_path` up to `max_depth`.
+#[allow(dead_code)]
+fn find_target_files_recursive(
+ dir_path: &Path,
+ target_filename: &str,
+ current_depth: u8,
+ max_depth: u8,
+ found_files: &mut Vec<PathBuf>,
+) -> Result<()> {
+ if current_depth > max_depth {
+ return Ok(());
+ }
+
+ for entry_result in fs::read_dir(dir_path)
+ .with_context(|| format!("Failed to read directory: {}", dir_path.display()))?
+ {
+ let entry = entry_result.with_context(|| {
+ format!("Failed to read directory entry in: {}", dir_path.display())
+ })?;
+ let path = entry.path();
+
+ if path.is_dir() {
+ find_target_files_recursive(
+ &path,
+ target_filename,
+ current_depth + 1,
+ max_depth,
+ found_files,
+ )?;
+ } else if path.is_file() {
+ if let Some(filename_osstr) = path.file_name() {
+ if let Some(filename_str) = filename_osstr.to_str() {
+ if filename_str == target_filename {
+ found_files.push(path);
+ }
+ }
+ }
+ }
+ }
+ Ok(())
+}
+
+pub fn generate_explorer_html(input_paths: &[PathBuf], output_path: &PathBuf) -> Result<String> {
+ if let Some(parent) = output_path.parent() {
if !parent.exists() {
fs::create_dir_all(parent).context(format!(
"Failed to create output directory: {}",
@@ -27,41 +70,67 @@ pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<St
}
let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html");
- let template = fs::read_to_string(&template_path).context(format!(
+ let template_content = fs::read_to_string(&template_path).context(format!(
"Template file not found or couldn't be read: {}",
template_path.display()
))?;
- let threads = inputs
+ if input_paths.is_empty() {
+ println!(
+ "No input JSON files found to process. Explorer will be generated with template defaults or empty data."
+ );
+ }
+
+ let threads = input_paths
.iter()
.map(|input_path| {
- let mut thread_data: Value = fs::read_to_string(input_path)
- .context(format!("Failed to read file: {}", input_path.display()))?
+ let file_content = fs::read_to_string(input_path)
+ .context(format!("Failed to read file: {}", input_path.display()))?;
+ let mut thread_data: Value = file_content
.parse::<Value>()
- .context(format!("Failed to parse JSON: {}", input_path.display()))?;
- thread_data["filename"] = json!(input_path); // This will be shown in a thread heading
+ .context(format!("Failed to parse JSON from file: {}", input_path.display()))?;
+
+ if let Some(obj) = thread_data.as_object_mut() {
+ obj.insert("filename".to_string(), json!(input_path.display().to_string()));
+ } else {
+ eprintln!("Warning: JSON data in {} is not a root object. Wrapping it to include filename.", input_path.display());
+ thread_data = json!({
+ "original_data": thread_data,
+ "filename": input_path.display().to_string()
+ });
+ }
Ok(thread_data)
})
.collect::<Result<Vec<_>>>()?;
- let all_threads = json!({ "threads": threads });
- let html_content = inject_thread_data(template, all_threads)?;
- fs::write(&output, &html_content)
- .context(format!("Failed to write output: {}", output.display()))?;
+ let all_threads_data = json!({ "threads": threads });
+ let html_content = inject_thread_data(template_content, all_threads_data)?;
+ fs::write(&output_path, &html_content)
+ .context(format!("Failed to write output: {}", output_path.display()))?;
- println!("Saved {} thread(s) to {}", threads.len(), output.display());
+ println!(
+ "Saved data from {} resolved file(s) ({} threads) to {}",
+ input_paths.len(),
+ threads.len(),
+ output_path.display()
+ );
Ok(html_content)
}
fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };";
- template
- .find(injection_marker)
- .context("Could not find the thread injection point in the template")?;
+ if !template.contains(injection_marker) {
+ anyhow::bail!(
+ "Could not find the thread injection point in the template. Expected: '{}'",
+ injection_marker
+ );
+ }
- let threads_json = serde_json::to_string_pretty(&threads_data)
- .context("Failed to serialize threads data to JSON")?;
- let script_injection = format!("let threadsData = {};", threads_json);
+ let threads_json_string = serde_json::to_string_pretty(&threads_data)
+ .context("Failed to serialize threads data to JSON")?
+ .replace("</script>", r"<\/script>");
+
+ let script_injection = format!("let threadsData = {};", threads_json_string);
let final_html = template.replacen(injection_marker, &script_injection, 1);
Ok(final_html)
@@ -71,5 +140,45 @@ fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
#[allow(dead_code)]
fn main() -> Result<()> {
let args = Args::parse();
- generate_explorer_html(&args.input, &args.output).map(|_| ())
+
+ const DEFAULT_FILENAME: &str = "last.messages.json";
+ const MAX_SEARCH_DEPTH: u8 = 2;
+
+ let mut resolved_input_files: Vec<PathBuf> = Vec::new();
+
+ for input_path_arg in &args.input {
+ if !input_path_arg.exists() {
+ eprintln!(
+ "Warning: Input path {} does not exist. Skipping.",
+ input_path_arg.display()
+ );
+ continue;
+ }
+
+ if input_path_arg.is_dir() {
+ find_target_files_recursive(
+ input_path_arg,
+ DEFAULT_FILENAME,
+ 0, // starting depth
+ MAX_SEARCH_DEPTH,
+ &mut resolved_input_files,
+ )
+ .with_context(|| {
+ format!(
+ "Error searching for '{}' files in directory: {}",
+ DEFAULT_FILENAME,
+ input_path_arg.display()
+ )
+ })?;
+ } else if input_path_arg.is_file() {
+ resolved_input_files.push(input_path_arg.clone());
+ }
+ }
+
+ resolved_input_files.sort_unstable();
+ resolved_input_files.dedup();
+
+ println!("No input paths provided/found.");
+
+ generate_explorer_html(&resolved_input_files, &args.output).map(|_| ())
}