Add `ep --failed=skip` to exclude errored examples from output (#46453)

Oleksiy Syvokon created

Release Notes:

- N/A

Change summary

crates/edit_prediction_cli/src/main.rs     | 61 ++++++++++++++++++-----
crates/edit_prediction_cli/src/progress.rs | 10 +++
2 files changed, 54 insertions(+), 17 deletions(-)

Detailed changes

crates/edit_prediction_cli/src/main.rs 🔗

@@ -35,7 +35,7 @@ use crate::distill::run_distill;
 use crate::example::{Example, group_examples_by_repo, read_example_files};
 use crate::format_prompt::run_format_prompt;
 use crate::load_project::run_load_project;
-use crate::paths::FAILED_EXAMPLES_DIR;
+use crate::paths::{FAILED_EXAMPLES_DIR, RUN_DIR};
 use crate::predict::run_prediction;
 use crate::progress::Progress;
 use crate::retrieve_context::run_context_retrieval;
@@ -69,6 +69,21 @@ struct EpArgs {
     in_place: bool,
     #[arg(long, short, global = true)]
     failfast: bool,
+    /// How to handle failed examples in output: keep them or skip them.
+    /// Failed examples are always logged to the run's failed directory.
+    #[arg(long, global = true, default_value = "keep")]
+    failed: FailedHandling,
+}
+
+/// Controls whether failed examples are included in the main output.
+/// Failed examples are always logged to the run's failed/ directory regardless of this setting.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, ValueEnum)]
+pub enum FailedHandling {
+    /// Include failed examples in the main output (default)
+    #[default]
+    Keep,
+    /// Exclude failed examples from the main output
+    Skip,
 }
 
 const INPUTS_HELP: &str = r#"
@@ -189,7 +204,7 @@ impl Display for Command {
 
 #[derive(Debug, Args, Clone)]
 struct FormatPromptArgs {
-    #[clap(long)]
+    #[clap(long, short('p'))]
     prompt_format: PromptFormat,
 }
 
@@ -533,7 +548,7 @@ fn main() {
                             }
                             .await;
 
-                            if let Err(error) = result {
+                            let failed = if let Err(error) = result {
                                 handle_error(
                                     error,
                                     &args,
@@ -543,18 +558,25 @@ fn main() {
                                     example,
                                 )
                                 .await;
-                            }
-
-                            if let Some(ref mut sender) = output_sender.clone() {
-                                let line = serde_json::to_string(example).unwrap();
-                                sender
-                                    .send(line)
-                                    .await
-                                    .expect("Failed to send to output writer");
-                            } else if args.output.is_none() && !matches!(command, Command::Eval(_))
-                            {
-                                let line = serde_json::to_string(example).unwrap();
-                                println!("{}", line);
+                                true
+                            } else {
+                                false
+                            };
+
+                            let should_write = !failed || args.failed == FailedHandling::Keep;
+                            if should_write {
+                                if let Some(ref mut sender) = output_sender.clone() {
+                                    let line = serde_json::to_string(example).unwrap();
+                                    sender
+                                        .send(line)
+                                        .await
+                                        .expect("Failed to send to output writer");
+                                } else if args.output.is_none()
+                                    && !matches!(command, Command::Eval(_))
+                                {
+                                    let line = serde_json::to_string(example).unwrap();
+                                    println!("{}", line);
+                                }
                             }
                         }
                     });
@@ -615,6 +637,15 @@ async fn handle_error(
         .await
         .unwrap();
 
+    let failed_jsonl_path = RUN_DIR.join("failed.jsonl");
+    let mut file = OpenOptions::new()
+        .create(true)
+        .append(true)
+        .open(&failed_jsonl_path)
+        .expect("Failed to open failed.jsonl");
+    writeln!(file, "{}", serde_json::to_string(example).unwrap())
+        .expect("Failed to write to failed.jsonl");
+
     let cursor_path = example
         .repo_name()
         .unwrap()

crates/edit_prediction_cli/src/progress.rs 🔗

@@ -6,6 +6,8 @@ use std::{
     time::{Duration, Instant},
 };
 
+use crate::paths::RUN_DIR;
+
 use log::{Level, Log, Metadata, Record};
 
 pub struct Progress {
@@ -413,9 +415,13 @@ impl Progress {
             } else {
                 0.0
             };
+            let failed_jsonl_path = RUN_DIR.join("failed.jsonl");
             eprintln!(
-                "\n{} of {} examples failed ({:.1}%)",
-                inner.failed_examples, total_examples, percentage
+                "\n{} of {} examples failed ({:.1}%)\nFailed examples: {}",
+                inner.failed_examples,
+                total_examples,
+                percentage,
+                failed_jsonl_path.display()
             );
         }
     }