main.rs

  1mod anthropic_client;
  2mod distill;
  3mod example;
  4mod filter_languages;
  5mod format_prompt;
  6mod git;
  7mod headless;
  8mod load_project;
  9mod metrics;
 10mod parse_output;
 11mod paths;
 12mod predict;
 13mod progress;
 14mod pull_examples;
 15mod reorder_patch;
 16mod retrieve_context;
 17mod score;
 18mod split_commit;
 19mod split_dataset;
 20mod synthesize;
 21use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
 22use collections::HashSet;
 23use edit_prediction::EditPredictionStore;
 24use futures::channel::mpsc;
 25use futures::{SinkExt as _, StreamExt as _};
 26use gpui::{AppContext as _, Application, BackgroundExecutor, Task};
 27use zeta_prompt::ZetaVersion;
 28
 29use reqwest_client::ReqwestClient;
 30use serde::{Deserialize, Deserializer, Serialize, Serializer};
 31use std::fmt::Display;
 32use std::fs::{File, OpenOptions};
 33use std::hash::{Hash, Hasher};
 34use std::io::{BufRead, BufReader, BufWriter, Write};
 35use std::sync::Mutex;
 36use std::{path::PathBuf, sync::Arc};
 37
 38use crate::distill::run_distill;
 39use crate::example::{Example, group_examples_by_repo, read_example_files};
 40use crate::filter_languages::{FilterLanguagesArgs, run_filter_languages};
 41use crate::format_prompt::run_format_prompt;
 42use crate::load_project::run_load_project;
 43use crate::paths::{FAILED_EXAMPLES_DIR, RUN_DIR};
 44use crate::predict::run_prediction;
 45use crate::progress::Progress;
 46use crate::retrieve_context::run_context_retrieval;
 47use crate::score::run_scoring;
 48use crate::split_commit::SplitCommitArgs;
 49use crate::split_dataset::SplitArgs;
 50use crate::synthesize::{SynthesizeConfig, run_synthesize};
 51
 52#[derive(Parser, Debug)]
 53#[command(name = "ep")]
 54struct EpArgs {
 55    #[arg(long, default_value_t = false)]
 56    printenv: bool,
 57    #[clap(long, default_value_t = 10, global = true)]
 58    max_parallelism: usize,
 59    #[clap(long, global = true)]
 60    limit: Option<usize>,
 61    #[clap(long, global = true)]
 62    offset: Option<usize>,
 63    /// Filter examples by name
 64    #[clap(long, global = true)]
 65    name: Option<String>,
 66    /// Filter examples by repository
 67    #[clap(long, global = true)]
 68    repo: Option<String>,
 69    #[command(subcommand)]
 70    command: Option<Command>,
 71    #[clap(global = true, help = INPUTS_HELP)]
 72    inputs: Vec<PathBuf>,
 73    #[arg(long, short, global = true)]
 74    output: Option<PathBuf>,
 75    #[arg(long, short, global = true)]
 76    in_place: bool,
 77    #[arg(long, short, global = true)]
 78    failfast: bool,
 79    /// How to handle failed examples in output: keep them or skip them.
 80    /// Failed examples are always logged to the run's failed directory.
 81    #[arg(long, global = true, default_value = "keep")]
 82    failed: FailedHandling,
 83}
 84
 85/// Controls whether failed examples are included in the main output.
 86/// Failed examples are always logged to the run's failed/ directory regardless of this setting.
 87#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, ValueEnum)]
 88pub enum FailedHandling {
 89    /// Include failed examples in the main output (default)
 90    #[default]
 91    Keep,
 92    /// Exclude failed examples from the main output
 93    Skip,
 94    /// Skip writing files
 95    SkipNoFiles,
 96}
 97
 98const INPUTS_HELP: &str = r#"
 99Inputs can be file paths or special specifiers:
100
101  path
102      Path to an example(s) file (.md, .json, or .jsonl)
103
104  captured-after:{timestamp}
105      Fetch captured examples from Snowflake after the given RFC3339 timestamp.
106
107      You can specify this multiple times and mix it with file inputs.
108
109      Required environment variables to connect to Snowflake:
110          EP_SNOWFLAKE_API_KEY
111          EP_SNOWFLAKE_BASE_URL
112
113      Optional:
114          EP_SNOWFLAKE_ROLE
115
116Examples:
117
118  # Predict from a file
119  ep predict examples.jsonl
120
121  # Predict from captured examples after a timestamp
122  ep predict captured-after:2025-01-01T00:00:00Z
123
124  # Mix file inputs and captured-after in the same invocation
125  ep predict examples.jsonl captured-after:2025-01-01T00:00:00Z
126"#;
127
128#[derive(Subcommand, Debug, Clone)]
129enum Command {
130    /// Parse markdown examples and output a combined .jsonl file
131    ParseExample,
132    /// Create git worktrees for each example and load file contents
133    LoadProject,
134    /// Retrieve context for input examples.
135    Context,
136    /// Generate a prompt string for a specific model
137    FormatPrompt(FormatPromptArgs),
138    /// Runs edit prediction
139    Predict(PredictArgs),
140    /// Parse model outputs (actual_output) into unified diffs (actual_patch).
141    /// Requires format-prompt to have been run first. Uses provider from prompt.
142    ParseOutput,
143    /// Computes a score based on actual and expected patches
144    Score(PredictArgs),
145    /// Prepares a distillation dataset by copying expected outputs to
146    /// predicted outputs and removing actual outputs and prompts.
147    Distill,
148    /// Print aggregated scores
149    Eval(PredictArgs),
150    /// Generate eval examples by analyzing git commits from a repository
151    Synthesize(SynthesizeArgs),
152    /// Remove git repositories and worktrees
153    Clean,
154    /// Generate an evaluation example by splitting a chronologically-ordered commit
155    SplitCommit(SplitCommitArgs),
156    /// Split a JSONL dataset into multiple files (stratified by repository_url if present)
157    Split(SplitArgs),
158    /// Filter a JSONL dataset by programming language (based on cursor_path extension)
159    FilterLanguages(FilterLanguagesArgs),
160}
161
162impl Display for Command {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        match self {
165            Command::ParseExample => write!(f, "parse-example"),
166            Command::LoadProject => write!(f, "load-project"),
167            Command::Context => write!(f, "context"),
168            Command::FormatPrompt(args) => {
169                write!(f, "format-prompt --provider={}", args.provider)
170            }
171            Command::Predict(args) => match &args.provider {
172                Some(provider) => write!(f, "predict --provider={}", provider),
173                None => write!(f, "predict"),
174            },
175            Command::ParseOutput => write!(f, "parse-output"),
176            Command::Score(args) => match &args.provider {
177                Some(provider) => write!(f, "score --provider={}", provider),
178                None => write!(f, "score"),
179            },
180            Command::Distill => write!(f, "distill"),
181            Command::Eval(args) => match &args.provider {
182                Some(provider) => write!(f, "eval --provider={}", provider),
183                None => write!(f, "eval"),
184            },
185            Command::Synthesize(args) => {
186                write!(f, "synthesize --repos {}", args.repos.join(" "))
187            }
188            Command::Clean => write!(f, "clean"),
189            Command::SplitCommit(_) => write!(f, "split-commit"),
190            Command::Split(_) => write!(f, "split"),
191            Command::FilterLanguages(_) => write!(f, "filter-languages"),
192        }
193    }
194}
195
196#[derive(Debug, Args, Clone)]
197struct FormatPromptArgs {
198    #[clap(long, short('p'), default_value_t = PredictionProvider::default())]
199    provider: PredictionProvider,
200}
201
202#[derive(Debug, Args, Clone)]
203struct PredictArgs {
204    #[clap(long, short('p'))]
205    provider: Option<PredictionProvider>,
206    #[clap(long, default_value_t = 1)]
207    repetitions: usize,
208}
209
210#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
211enum PredictionProvider {
212    Sweep,
213    Mercury,
214    Zeta1,
215    Zeta2(ZetaVersion),
216    Teacher(ZetaVersion),
217    TeacherNonBatching(ZetaVersion),
218}
219
220impl Default for PredictionProvider {
221    fn default() -> Self {
222        PredictionProvider::Zeta2(ZetaVersion::default())
223    }
224}
225
226impl std::fmt::Display for PredictionProvider {
227    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228        match self {
229            PredictionProvider::Sweep => write!(f, "sweep"),
230            PredictionProvider::Mercury => write!(f, "mercury"),
231            PredictionProvider::Zeta1 => write!(f, "zeta1"),
232            PredictionProvider::Zeta2(version) => write!(f, "zeta2:{version}"),
233            PredictionProvider::Teacher(version) => write!(f, "teacher:{version}"),
234            PredictionProvider::TeacherNonBatching(version) => {
235                write!(f, "teacher-non-batching:{version}")
236            }
237        }
238    }
239}
240
241impl std::str::FromStr for PredictionProvider {
242    type Err = anyhow::Error;
243
244    fn from_str(mut s: &str) -> Result<Self, Self::Err> {
245        let mut version = ZetaVersion::default();
246        if let Some((first, second)) = s.split_once(':') {
247            version = ZetaVersion::parse(second)?;
248            s = first;
249        }
250
251        let s_lower = s.to_lowercase();
252        match s_lower.as_str() {
253            "sweep" => Ok(PredictionProvider::Sweep),
254            "mercury" => Ok(PredictionProvider::Mercury),
255            "zeta1" => Ok(PredictionProvider::Zeta1),
256            "zeta2" => Ok(PredictionProvider::Zeta2(version)),
257            "teacher" => Ok(PredictionProvider::Teacher(version)),
258            "teacher-non-batching" | "teacher_non_batching" | "teachernonbatching" => {
259                Ok(PredictionProvider::TeacherNonBatching(version))
260            }
261            _ => {
262                anyhow::bail!(
263                    "unknown provider `{s}`. Valid options: sweep, mercury, zeta1, zeta2, zeta2:<version>, teacher, teacher-non-batching\n\
264                 For zeta2, you can optionally specify a version like `zeta2:ordered` or `zeta2:V0113_Ordered`.\n\
265                 Available zeta versions:\n{}",
266                    ZetaVersion::options_as_string()
267                )
268            }
269        }
270    }
271}
272
273impl Serialize for PredictionProvider {
274    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
275    where
276        S: Serializer,
277    {
278        serializer.serialize_str(&self.to_string())
279    }
280}
281
282impl<'de> Deserialize<'de> for PredictionProvider {
283    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
284    where
285        D: Deserializer<'de>,
286    {
287        let s = String::deserialize(deserializer)?;
288        s.parse().map_err(serde::de::Error::custom)
289    }
290}
291
292#[derive(Debug, Args, Clone)]
293struct SynthesizeArgs {
294    /// Repository URLs (git@github.com:owner/repo or https://...)
295    #[clap(long, required = true, num_args = 1..)]
296    repos: Vec<String>,
297
298    /// Number of examples to generate per repository
299    #[clap(long, default_value_t = 5)]
300    count: usize,
301
302    /// Maximum commits to scan per repository before giving up
303    #[clap(long, default_value_t = 100)]
304    max_commits: usize,
305
306    /// Ignore state file and reprocess all commits
307    #[clap(long)]
308    fresh: bool,
309}
310
311impl EpArgs {
312    fn output_path(&self) -> Option<PathBuf> {
313        if self.in_place {
314            if self.inputs.len() == 1 {
315                self.inputs.first().cloned()
316            } else {
317                panic!("--in-place requires exactly one input file")
318            }
319        } else {
320            self.output.clone()
321        }
322    }
323}
324
325async fn load_examples(
326    http_client: Arc<dyn http_client::HttpClient>,
327    args: &EpArgs,
328    output_path: Option<&PathBuf>,
329    background_executor: BackgroundExecutor,
330) -> anyhow::Result<Vec<Example>> {
331    let mut captured_after_timestamps = Vec::new();
332    let mut file_inputs = Vec::new();
333
334    for input in &args.inputs {
335        let input_string = input.to_string_lossy();
336        if let Some(timestamp) = pull_examples::parse_captured_after_input(input_string.as_ref()) {
337            captured_after_timestamps.push(timestamp.to_string());
338        } else {
339            file_inputs.push(input.clone());
340        }
341    }
342
343    let mut examples = read_example_files(&file_inputs);
344
345    Progress::global().set_total_examples(examples.len());
346
347    let remaining_limit_for_snowflake =
348        args.limit.map(|limit| limit.saturating_sub(examples.len()));
349
350    if let Some(0) = remaining_limit_for_snowflake {
351        log::info!(
352            "skipping captured-after inputs because --limit is already satisfied by example files"
353        );
354    } else if !captured_after_timestamps.is_empty() {
355        captured_after_timestamps.sort();
356
357        let max_rows_per_timestamp = remaining_limit_for_snowflake.unwrap_or(5000);
358
359        let mut captured_examples = pull_examples::fetch_captured_examples_after(
360            http_client,
361            &captured_after_timestamps,
362            max_rows_per_timestamp,
363            background_executor,
364        )
365        .await?;
366        examples.append(&mut captured_examples);
367    }
368
369    crate::example::sort_examples_by_repo_and_rev(&mut examples);
370
371    if let Some(name_filter) = &args.name {
372        examples.retain(|example| example.spec.name.contains(name_filter));
373    }
374    if let Some(repo_filter) = &args.repo {
375        examples.retain(|example| example.spec.repository_url.contains(repo_filter));
376    }
377
378    // Skip resume logic for --in-place since input and output are the same file,
379    // which would incorrectly treat all input examples as already processed.
380    if !args.in_place {
381        if let Some(path) = output_path {
382            resume_from_output(path, &mut examples);
383        }
384    }
385
386    if let Some(offset) = args.offset {
387        examples.splice(0..offset, []);
388    }
389
390    if let Some(limit) = args.limit {
391        examples.truncate(limit);
392    }
393
394    Progress::global().set_total_examples(examples.len());
395
396    Ok(examples)
397}
398
399fn spec_hash(spec: &edit_prediction::example_spec::ExampleSpec) -> u64 {
400    let mut hasher = collections::FxHasher::default();
401    spec.hash(&mut hasher);
402    hasher.finish()
403}
404
405fn resume_from_output(path: &PathBuf, examples: &mut Vec<Example>) {
406    let file = match File::open(path) {
407        Ok(f) => f,
408        Err(_) => return,
409    };
410
411    let input_hashes: HashSet<u64> = examples.iter().map(|e| spec_hash(&e.spec)).collect();
412
413    let reader = BufReader::new(file);
414    let mut kept_lines = Vec::new();
415    let mut kept_hashes = HashSet::default();
416
417    for line in reader.lines() {
418        let line = match line {
419            Ok(l) => l,
420            Err(_) => continue,
421        };
422
423        if let Ok(output_example) = serde_json::from_str::<Example>(&line) {
424            let hash = spec_hash(&output_example.spec);
425            if input_hashes.contains(&hash) && !kept_hashes.contains(&hash) {
426                kept_hashes.insert(hash);
427                kept_lines.push(line);
428            }
429        }
430    }
431
432    let total = examples.len();
433    let already_processed = kept_hashes.len();
434
435    eprintln!(
436        "Resuming: {}/{} examples already processed",
437        already_processed, total
438    );
439
440    let file = OpenOptions::new()
441        .write(true)
442        .truncate(true)
443        .open(path)
444        .expect("Failed to open output file for rewriting");
445    let mut writer = BufWriter::new(file);
446    for line in &kept_lines {
447        writeln!(writer, "{}", line).expect("Failed to write to output file");
448    }
449    writer.flush().expect("Failed to flush output file");
450
451    examples.retain(|e| !kept_hashes.contains(&spec_hash(&e.spec)));
452}
453
454fn main() {
455    let args = EpArgs::parse();
456
457    if args.printenv {
458        ::util::shell_env::print_env();
459        return;
460    }
461
462    let output = args.output_path();
463    let command = match &args.command {
464        Some(cmd) => cmd.clone(),
465        None => {
466            EpArgs::command().print_help().unwrap();
467            return;
468        }
469    };
470
471    match &command {
472        Command::Clean => {
473            std::fs::remove_dir_all(&*paths::DATA_DIR).unwrap();
474            return;
475        }
476        Command::Synthesize(synth_args) => {
477            let Some(output_dir) = args.output else {
478                panic!("output dir is required");
479            };
480            let config = SynthesizeConfig {
481                repo_urls: synth_args.repos.clone(),
482                count: synth_args.count,
483                max_commits: synth_args.max_commits,
484                output_dir,
485                fresh: synth_args.fresh,
486            };
487            smol::block_on(async {
488                if let Err(e) = run_synthesize(config).await {
489                    eprintln!("Error: {:?}", e);
490                    std::process::exit(1);
491                }
492            });
493            return;
494        }
495        Command::SplitCommit(split_commit_args) => {
496            if let Err(error) = split_commit::run_split_commit(
497                split_commit_args,
498                &args.inputs,
499                output.as_ref(),
500                args.failed,
501            ) {
502                eprintln!("{error:#}");
503                std::process::exit(1);
504            }
505            return;
506        }
507        Command::Split(split_args) => {
508            if let Err(error) = split_dataset::run_split(split_args, &args.inputs) {
509                eprintln!("{error:#}");
510                std::process::exit(1);
511            }
512            return;
513        }
514        Command::FilterLanguages(filter_args) => {
515            if let Err(error) =
516                run_filter_languages(filter_args, &args.inputs, args.output.as_ref())
517            {
518                eprintln!("{error:#}");
519                std::process::exit(1);
520            }
521            return;
522        }
523        _ => {}
524    }
525
526    let http_client = Arc::new(ReqwestClient::new());
527    let app = Application::headless().with_http_client(http_client);
528
529    app.run(move |cx| {
530        let app_state = Arc::new(headless::init(cx));
531        EditPredictionStore::global(&app_state.client, &app_state.user_store, cx);
532
533        cx.spawn(async move |cx| {
534            let result = async {
535                let examples = load_examples(
536                    app_state.client.http_client(),
537                    &args,
538                    output.as_ref(),
539                    cx.background_executor().clone(),
540                )
541                .await?;
542
543                match &command {
544                    Command::Predict(args) | Command::Score(args) | Command::Eval(args) => {
545                        predict::sync_batches(args.provider.as_ref()).await?;
546                    }
547                    _ => (),
548                }
549
550                let failfast_on_single_example = examples.len() == 1;
551
552                // For --in-place, write to a temp file and rename at the end to avoid data loss on interruption
553                let in_place_temp_path = if args.in_place {
554                    output.as_ref().map(|path| {
555                        let mut temp_path = path.clone();
556                        temp_path.set_extension("jsonl.tmp");
557                        temp_path
558                    })
559                } else {
560                    None
561                };
562
563                let output_sender: Option<mpsc::UnboundedSender<String>> =
564                    if args.output.is_some() || !matches!(command, Command::Eval(_)) {
565                        let write_path = in_place_temp_path.as_ref().or(output.as_ref());
566                        write_path.map(|path| {
567                            let file = if args.in_place {
568                                // For --in-place, write to temp file (truncate if exists)
569                                OpenOptions::new()
570                                    .create(true)
571                                    .write(true)
572                                    .truncate(true)
573                                    .open(path)
574                                    .expect("Failed to open temp output file")
575                            } else {
576                                // For regular output, append to support resuming
577                                OpenOptions::new()
578                                    .create(true)
579                                    .append(true)
580                                    .open(path)
581                                    .expect("Failed to open output file")
582                            };
583                            let mut writer = BufWriter::new(file);
584                            let (sender, mut receiver) = mpsc::unbounded::<String>();
585                            cx.background_spawn(async move {
586                                while let Some(line) = receiver.next().await {
587                                    writeln!(writer, "{}", line).expect("Failed to write example");
588                                    writer.flush().expect("Failed to flush output");
589                                }
590                            })
591                            .detach();
592                            sender
593                        })
594                    } else {
595                        None
596                    };
597
598                let grouped_examples = Mutex::new(group_examples_by_repo(examples));
599                let finished_examples = Mutex::new(Vec::new());
600
601                let mut tasks = Vec::new();
602                for _ in 0..args.max_parallelism {
603                    tasks.push(async {
604                        loop {
605                            let Some(mut repo_examples) =
606                                grouped_examples.lock().unwrap().pop_front()
607                            else {
608                                break;
609                            };
610                            for example in &mut repo_examples {
611                                let example_progress =
612                                    Progress::global().start_group(&example.spec.name);
613
614                                let result = async {
615                                    match &command {
616                                        Command::ParseExample => {}
617                                        Command::LoadProject => {
618                                            run_load_project(
619                                                example,
620                                                app_state.clone(),
621                                                &example_progress,
622                                                cx.clone(),
623                                            )
624                                            .await?;
625                                        }
626                                        Command::Context => {
627                                            run_context_retrieval(
628                                                example,
629                                                app_state.clone(),
630                                                &example_progress,
631                                                cx.clone(),
632                                            )
633                                            .await?;
634                                        }
635                                        Command::FormatPrompt(args) => {
636                                            run_format_prompt(
637                                                example,
638                                                args,
639                                                app_state.clone(),
640                                                &example_progress,
641                                                cx.clone(),
642                                            )
643                                            .await?;
644                                        }
645                                        Command::Predict(args) => {
646                                            run_prediction(
647                                                example,
648                                                args,
649                                                app_state.clone(),
650                                                &example_progress,
651                                                cx.clone(),
652                                            )
653                                            .await?;
654                                        }
655                                        Command::ParseOutput => {
656                                            parse_output::run_parse_output(example)?;
657                                        }
658                                        Command::Distill => {
659                                            run_distill(example).await?;
660                                        }
661                                        Command::Score(args) | Command::Eval(args) => {
662                                            run_scoring(
663                                                example,
664                                                &args,
665                                                app_state.clone(),
666                                                &example_progress,
667                                                cx.clone(),
668                                            )
669                                            .await?;
670                                        }
671                                        Command::Clean
672                                        | Command::Synthesize(_)
673                                        | Command::SplitCommit(_)
674                                        | Command::Split(_)
675                                        | Command::FilterLanguages(_) => {
676                                            unreachable!()
677                                        }
678                                    }
679                                    anyhow::Ok(())
680                                }
681                                .await;
682
683                                let failed = if let Err(error) = result {
684                                    handle_error(
685                                        error,
686                                        &args,
687                                        &command,
688                                        &app_state,
689                                        failfast_on_single_example,
690                                        &example,
691                                    )
692                                    .await;
693                                    true
694                                } else {
695                                    false
696                                };
697
698                                let should_write = !failed || args.failed == FailedHandling::Keep;
699                                if should_write {
700                                    if let Some(ref mut sender) = output_sender.clone() {
701                                        let line = serde_json::to_string(&example).unwrap();
702                                        sender
703                                            .send(line)
704                                            .await
705                                            .expect("Failed to send to output writer");
706                                    } else if args.output.is_none()
707                                        && !matches!(command, Command::Eval(_))
708                                    {
709                                        let line = serde_json::to_string(&example).unwrap();
710                                        println!("{}", line);
711                                    }
712                                }
713                            }
714
715                            let repo_url = &repo_examples.first().unwrap().spec.repository_url;
716                            let project = repo_examples
717                                .iter()
718                                .find_map(|e| e.state.as_ref().map(|s| s.project.clone()))
719                                .or_else(|| app_state.project_cache.get(repo_url));
720
721                            if let Some(project) = project {
722                                let mut cx = cx.clone();
723
724                                let shutdown_task: Task<()> =
725                                    project.update(&mut cx, |project, cx| {
726                                        let lsp_store = project.lsp_store();
727                                        lsp_store.update(cx, |lsp_store, cx| {
728                                            lsp_store.shutdown_all_language_servers(cx)
729                                        })
730                                    });
731
732                                shutdown_task.await;
733
734                                if let Some(ep_store) =
735                                    cx.update(|cx| EditPredictionStore::try_global(cx))
736                                {
737                                    ep_store.update(&mut cx, |store, _| {
738                                        store.remove_project(&project);
739                                    });
740                                }
741                            }
742
743                            app_state.project_cache.remove(repo_url);
744                            for example in &mut repo_examples {
745                                example.state.take();
746                            }
747                            finished_examples
748                                .lock()
749                                .unwrap()
750                                .extend_from_slice(&repo_examples);
751                        }
752                    });
753                }
754                futures::future::join_all(tasks).await;
755
756                Progress::global().finalize();
757
758                match &command {
759                    Command::Predict(args) | Command::Score(args) | Command::Eval(args) => {
760                        predict::sync_batches(args.provider.as_ref()).await?;
761                    }
762                    _ => (),
763                }
764
765                match &command {
766                    Command::Eval(_) => score::print_report(&finished_examples.lock().unwrap()),
767                    _ => (),
768                };
769
770                // For --in-place, atomically rename temp file to original
771                if let (Some(temp_path), Some(final_path)) = (&in_place_temp_path, &output) {
772                    std::fs::rename(temp_path, final_path)
773                        .expect("Failed to rename temp file to final output");
774                }
775
776                anyhow::Ok(())
777            }
778            .await;
779
780            if let Err(e) = result {
781                panic!("Fatal error: {:?}", e);
782            }
783
784            let _ = cx.update(|cx| cx.quit());
785        })
786        .detach();
787    });
788}
789
790async fn handle_error(
791    error: anyhow::Error,
792    args: &EpArgs,
793    command: &Command,
794    app_state: &Arc<headless::EpAppState>,
795    failfast_on_single_example: bool,
796    example: &Example,
797) {
798    Progress::global().increment_failed();
799
800    let msg;
801    if !matches!(args.failed, FailedHandling::SkipNoFiles) {
802        let example_name = example.spec.filename();
803
804        let failed_example_path = FAILED_EXAMPLES_DIR.join(format!("{}.json", example_name));
805        app_state
806            .fs
807            .write(
808                &failed_example_path,
809                &serde_json::to_vec_pretty(&example).unwrap(),
810            )
811            .await
812            .unwrap();
813        let err_path = FAILED_EXAMPLES_DIR.join(format!("{}_err.txt", example_name));
814        app_state
815            .fs
816            .write(&err_path, format!("{error:?}").as_bytes())
817            .await
818            .unwrap();
819
820        let failed_jsonl_path = RUN_DIR.join("failed.jsonl");
821        let mut file = OpenOptions::new()
822            .create(true)
823            .append(true)
824            .open(&failed_jsonl_path)
825            .expect("Failed to open failed.jsonl");
826        writeln!(file, "{}", serde_json::to_string(example).unwrap())
827            .expect("Failed to write to failed.jsonl");
828
829        let cursor_path = example
830            .repo_name()
831            .unwrap()
832            .worktree_path()
833            .join(&example.spec.cursor_path);
834        msg = format!(
835            indoc::indoc! {"
836                While processing \"{}\":
837
838                \x1b[31m{:?}\x1b[0m
839
840                Example:        \x1b[36m{}\x1b[0m
841                Error file:     \x1b[36m{}\x1b[0m
842                Cursor file:    \x1b[36m{}\x1b[0m
843                Re-run:         cargo run -p edit_prediction_cli -- {} \x1b[36m{}\x1b[0m
844            "},
845            example.spec.name,
846            error,
847            failed_example_path.display(),
848            err_path.display(),
849            cursor_path.display(),
850            command,
851            failed_example_path.display(),
852        );
853    } else {
854        msg = format!(
855            indoc::indoc! {"
856            While processing \"{}\":
857
858                \x1b[31m{:?}\x1b[0m
859            "},
860            example.spec.name, error
861        );
862    }
863
864    if args.failfast || failfast_on_single_example {
865        Progress::global().finalize();
866        panic!("{}", msg);
867    } else {
868        log::error!("{}", msg);
869    }
870}