evaluate.rs

  1use std::{
  2    collections::{BTreeSet, HashMap},
  3    io::{IsTerminal, Write},
  4    sync::Arc,
  5};
  6
  7use anyhow::Result;
  8use collections::HashSet;
  9use gpui::{AsyncApp, Entity};
 10use project::Project;
 11use sweep_ai::SweepAi;
 12use util::ResultExt as _;
 13use zeta2::{Zeta, udiff::DiffLine};
 14
 15use crate::{
 16    EvaluateArguments, PredictionOptions, PredictionProvider,
 17    example::{Example, NamedExample},
 18    headless::ZetaCliAppState,
 19    paths::print_run_data_dir,
 20    predict::{PredictionDetails, perform_predict, setup_sweep, setup_zeta},
 21};
 22
 23#[derive(Debug)]
 24pub(crate) struct ExecutionData {
 25    execution_id: String,
 26    diff: String,
 27    reasoning: String,
 28}
 29
 30pub async fn run_evaluate(
 31    args: EvaluateArguments,
 32    app_state: &Arc<ZetaCliAppState>,
 33    cx: &mut AsyncApp,
 34) {
 35    if args.example_paths.is_empty() {
 36        eprintln!("No examples provided");
 37        return;
 38    }
 39
 40    let all_tasks = args.example_paths.into_iter().map(|path| {
 41        let options = args.options.clone();
 42        let app_state = app_state.clone();
 43        let example = NamedExample::load(&path).expect("Failed to load example");
 44
 45        cx.spawn(async move |cx| {
 46            let project = example.setup_project(&app_state, cx).await.unwrap();
 47
 48            let providers = (0..args.repetitions)
 49                .map(|_| {
 50                    (
 51                        setup_zeta(&project, &app_state, cx).unwrap(),
 52                        if matches!(args.options.provider, PredictionProvider::Sweep) {
 53                            Some(setup_sweep(&project, cx).unwrap())
 54                        } else {
 55                            None
 56                        },
 57                    )
 58                })
 59                .collect::<Vec<_>>();
 60
 61            let _edited_buffers = example.apply_edit_history(&project, cx).await.unwrap();
 62
 63            let tasks =
 64                providers
 65                    .into_iter()
 66                    .enumerate()
 67                    .map(move |(repetition_ix, (zeta, sweep))| {
 68                        let repetition_ix = (args.repetitions > 1).then(|| repetition_ix as u16);
 69                        let example = example.clone();
 70                        let project = project.clone();
 71                        let options = options.clone();
 72
 73                        cx.spawn(async move |cx| {
 74                            let name = example.name.clone();
 75                            run_evaluate_one(
 76                                example,
 77                                repetition_ix,
 78                                project,
 79                                zeta,
 80                                sweep,
 81                                options,
 82                                !args.skip_prediction,
 83                                cx,
 84                            )
 85                            .await
 86                            .map_err(|err| (err, name, repetition_ix))
 87                        })
 88                    });
 89            futures::future::join_all(tasks).await
 90        })
 91    });
 92    let all_results = futures::future::join_all(all_tasks).await;
 93
 94    write_aggregated_scores(&mut std::io::stdout(), &all_results).unwrap();
 95    if let Some(mut output_file) =
 96        std::fs::File::create(crate::paths::RUN_DIR.join("aggregated_results.md")).log_err()
 97    {
 98        write_aggregated_scores(&mut output_file, &all_results).log_err();
 99    };
100
101    if args.repetitions > 1 {
102        if let Err(e) = write_bucketed_analysis(&all_results) {
103            eprintln!("Failed to write bucketed analysis: {:?}", e);
104        }
105    }
106
107    print_run_data_dir(args.repetitions == 1, std::io::stdout().is_terminal());
108}
109
110fn write_aggregated_scores(
111    w: &mut impl std::io::Write,
112    all_results: &Vec<
113        Vec<Result<(EvaluationResult, ExecutionData), (anyhow::Error, String, Option<u16>)>>,
114    >,
115) -> Result<()> {
116    let mut successful = Vec::new();
117    let mut failed_count = 0;
118
119    for result in all_results.iter().flatten() {
120        match result {
121            Ok((eval_result, _execution_data)) => successful.push(eval_result),
122            Err((err, name, repetition_ix)) => {
123                if failed_count == 0 {
124                    writeln!(w, "## Errors\n")?;
125                }
126
127                failed_count += 1;
128                writeln!(w, "{}", fmt_evaluation_error(err, name, repetition_ix))?;
129            }
130        }
131    }
132
133    if successful.len() > 1 {
134        let mut edit_predictions = successful
135            .iter()
136            .filter_map(|r| r.edit_prediction.as_ref())
137            .peekable();
138        let has_edit_predictions = edit_predictions.peek().is_some();
139        let aggregated_result = EvaluationResult {
140            context: Scores::aggregate(successful.iter().map(|r| &r.context)),
141            edit_prediction: has_edit_predictions.then(|| Scores::aggregate(edit_predictions)),
142            prompt_len: successful.iter().map(|r| r.prompt_len).sum::<usize>() / successful.len(),
143            generated_len: successful.iter().map(|r| r.generated_len).sum::<usize>()
144                / successful.len(),
145            context_lines_found_in_context: successful
146                .iter()
147                .map(|r| r.context_lines_found_in_context)
148                .sum::<usize>()
149                / successful.len(),
150            context_lines_in_expected_patch: successful
151                .iter()
152                .map(|r| r.context_lines_in_expected_patch)
153                .sum::<usize>()
154                / successful.len(),
155        };
156
157        writeln!(w, "\n{}", "-".repeat(80))?;
158        writeln!(w, "\n## TOTAL SCORES")?;
159        writeln!(w, "{:#}", aggregated_result)?;
160    }
161
162    if successful.len() + failed_count > 1 {
163        writeln!(
164            w,
165            "\nCongratulations! {}/{} ({:.2}%) of runs weren't outright failures 🎉",
166            successful.len(),
167            successful.len() + failed_count,
168            (successful.len() as f64 / (successful.len() + failed_count) as f64) * 100.0
169        )?;
170    }
171
172    Ok(())
173}
174
175pub async fn run_evaluate_one(
176    example: NamedExample,
177    repetition_ix: Option<u16>,
178    project: Entity<Project>,
179    zeta: Entity<Zeta>,
180    sweep: Option<Entity<SweepAi>>,
181    prediction_options: PredictionOptions,
182    predict: bool,
183    cx: &mut AsyncApp,
184) -> Result<(EvaluationResult, ExecutionData)> {
185    let predict_result = perform_predict(
186        example.clone(),
187        project,
188        zeta,
189        sweep,
190        repetition_ix,
191        prediction_options,
192        cx,
193    )
194    .await?;
195
196    let evaluation_result = evaluate(&example.example, &predict_result, predict);
197
198    if repetition_ix.is_none() {
199        write_eval_result(
200            &example,
201            &predict_result,
202            &evaluation_result,
203            &mut std::io::stdout(),
204            std::io::stdout().is_terminal(),
205            predict,
206        )?;
207    }
208
209    if let Some(mut results_file) =
210        std::fs::File::create(predict_result.run_example_dir.join("results.md")).log_err()
211    {
212        write_eval_result(
213            &example,
214            &predict_result,
215            &evaluation_result,
216            &mut results_file,
217            false,
218            predict,
219        )
220        .log_err();
221    }
222
223    let execution_data = ExecutionData {
224        execution_id: if let Some(rep_ix) = repetition_ix {
225            format!("{:03}", rep_ix)
226        } else {
227            example.name.clone()
228        },
229        diff: predict_result.diff.clone(),
230        reasoning: std::fs::read_to_string(
231            predict_result
232                .run_example_dir
233                .join("prediction_response.md"),
234        )
235        .unwrap_or_default(),
236    };
237
238    anyhow::Ok((evaluation_result, execution_data))
239}
240
241fn write_eval_result(
242    example: &NamedExample,
243    predictions: &PredictionDetails,
244    evaluation_result: &EvaluationResult,
245    out: &mut impl Write,
246    use_color: bool,
247    predict: bool,
248) -> Result<()> {
249    if predict {
250        writeln!(
251            out,
252            "## Expected edit prediction:\n\n```diff\n{}\n```\n",
253            compare_diffs(
254                &example.example.expected_patch,
255                &predictions.diff,
256                use_color
257            )
258        )?;
259        writeln!(
260            out,
261            "## Actual edit prediction:\n\n```diff\n{}\n```\n",
262            compare_diffs(
263                &predictions.diff,
264                &example.example.expected_patch,
265                use_color
266            )
267        )?;
268    }
269
270    writeln!(out, "{:#}", evaluation_result)?;
271
272    anyhow::Ok(())
273}
274
275#[derive(Debug, Default)]
276pub struct EvaluationResult {
277    pub edit_prediction: Option<Scores>,
278    pub context: Scores,
279    pub prompt_len: usize,
280    pub generated_len: usize,
281    pub context_lines_in_expected_patch: usize,
282    pub context_lines_found_in_context: usize,
283}
284
285#[derive(Default, Debug)]
286pub struct Scores {
287    pub true_positives: usize,
288    pub false_positives: usize,
289    pub false_negatives: usize,
290}
291
292impl Scores {
293    pub fn new(expected: &HashSet<String>, actual: &HashSet<String>) -> Scores {
294        let true_positives = expected.intersection(actual).count();
295        let false_positives = actual.difference(expected).count();
296        let false_negatives = expected.difference(actual).count();
297
298        Scores {
299            true_positives,
300            false_positives,
301            false_negatives,
302        }
303    }
304
305    pub fn to_markdown(&self) -> String {
306        format!(
307            "
308Precision       : {:.4}
309Recall          : {:.4}
310F1 Score        : {:.4}
311True Positives  : {}
312False Positives : {}
313False Negatives : {}",
314            self.precision(),
315            self.recall(),
316            self.f1_score(),
317            self.true_positives,
318            self.false_positives,
319            self.false_negatives
320        )
321    }
322
323    pub fn aggregate<'a>(scores: impl Iterator<Item = &'a Scores>) -> Scores {
324        let mut true_positives = 0;
325        let mut false_positives = 0;
326        let mut false_negatives = 0;
327
328        for score in scores {
329            true_positives += score.true_positives;
330            false_positives += score.false_positives;
331            false_negatives += score.false_negatives;
332        }
333
334        Scores {
335            true_positives,
336            false_positives,
337            false_negatives,
338        }
339    }
340
341    pub fn precision(&self) -> f64 {
342        if self.true_positives + self.false_positives == 0 {
343            0.0
344        } else {
345            self.true_positives as f64 / (self.true_positives + self.false_positives) as f64
346        }
347    }
348
349    pub fn recall(&self) -> f64 {
350        if self.true_positives + self.false_negatives == 0 {
351            0.0
352        } else {
353            self.true_positives as f64 / (self.true_positives + self.false_negatives) as f64
354        }
355    }
356
357    pub fn f1_score(&self) -> f64 {
358        let recall = self.recall();
359        let precision = self.precision();
360        if precision + recall == 0.0 {
361            0.0
362        } else {
363            2.0 * precision * recall / (precision + recall)
364        }
365    }
366}
367
368impl std::fmt::Display for EvaluationResult {
369    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
370        if f.alternate() {
371            self.fmt_table(f)
372        } else {
373            self.fmt_markdown(f)
374        }
375    }
376}
377
378impl EvaluationResult {
379    fn fmt_markdown(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380        write!(
381            f,
382            r#"
383### Context Scores
384{}
385"#,
386            self.context.to_markdown(),
387        )?;
388        if let Some(prediction) = &self.edit_prediction {
389            write!(
390                f,
391                r#"
392                ### Edit Prediction Scores
393                {}"#,
394                prediction.to_markdown()
395            )?;
396        }
397        Ok(())
398    }
399
400    fn fmt_table(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
401        writeln!(f, "### Scores\n")?;
402        writeln!(
403            f,
404            "                   Prompt  Generated RetrievedContext PatchContext     TP     FP     FN     Precision   Recall     F1"
405        )?;
406        writeln!(
407            f,
408            "─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────"
409        )?;
410        writeln!(
411            f,
412            "Context Retrieval  {:<7} {:<9} {:<16} {:<16} {:<6} {:<6} {:<6} {:>10.2} {:>7.2} {:>7.2}",
413            "",
414            "",
415            "",
416            "",
417            self.context.true_positives,
418            self.context.false_positives,
419            self.context.false_negatives,
420            self.context.precision() * 100.0,
421            self.context.recall() * 100.0,
422            self.context.f1_score() * 100.0
423        )?;
424        if let Some(edit_prediction) = &self.edit_prediction {
425            writeln!(
426                f,
427                "Edit Prediction    {:<7} {:<9} {:<16} {:<16} {:<6} {:<6} {:<6} {:>10.2} {:>7.2} {:>7.2}",
428                self.prompt_len,
429                self.generated_len,
430                self.context_lines_found_in_context,
431                self.context_lines_in_expected_patch,
432                edit_prediction.true_positives,
433                edit_prediction.false_positives,
434                edit_prediction.false_negatives,
435                edit_prediction.precision() * 100.0,
436                edit_prediction.recall() * 100.0,
437                edit_prediction.f1_score() * 100.0
438            )?;
439        }
440        Ok(())
441    }
442}
443
444fn evaluate(example: &Example, preds: &PredictionDetails, predict: bool) -> EvaluationResult {
445    let mut eval_result = EvaluationResult {
446        prompt_len: preds.prompt_len,
447        generated_len: preds.generated_len,
448        ..Default::default()
449    };
450
451    let actual_context_lines: HashSet<_> = preds
452        .excerpts
453        .iter()
454        .flat_map(|excerpt| {
455            excerpt
456                .text
457                .lines()
458                .map(|line| format!("{}: {line}", excerpt.path.display()))
459        })
460        .collect();
461
462    let mut false_positive_lines = actual_context_lines.clone();
463
464    for entry in &example.expected_context {
465        let mut best_alternative_score: Option<Scores> = None;
466
467        for alternative in &entry.alternatives {
468            let expected: HashSet<_> = alternative
469                .excerpts
470                .iter()
471                .flat_map(|excerpt| {
472                    excerpt
473                        .text
474                        .lines()
475                        .map(|line| format!("{}: {line}", excerpt.path.display()))
476                })
477                .collect();
478
479            let scores = Scores::new(&expected, &actual_context_lines);
480
481            false_positive_lines.retain(|line| !expected.contains(line));
482
483            if best_alternative_score
484                .as_ref()
485                .is_none_or(|best| scores.recall() > best.recall())
486            {
487                best_alternative_score = Some(scores);
488            }
489        }
490
491        let best_alternative = best_alternative_score.unwrap_or_default();
492        eval_result.context.false_negatives += best_alternative.false_negatives;
493        eval_result.context.true_positives += best_alternative.true_positives;
494    }
495
496    eval_result.context.false_positives = false_positive_lines.len();
497
498    if predict {
499        // todo: alternatives for patches
500        let expected_patch = example
501            .expected_patch
502            .lines()
503            .map(DiffLine::parse)
504            .collect::<Vec<_>>();
505        let expected_patch_lines = expected_patch
506            .iter()
507            .filter(|line| matches!(line, DiffLine::Addition(_) | DiffLine::Deletion(_)))
508            .map(|line| line.to_string())
509            .collect();
510        let expected_context_lines = expected_patch
511            .iter()
512            .filter_map(|line| {
513                if let DiffLine::Context(str) = line {
514                    Some(String::from(*str))
515                } else {
516                    None
517                }
518            })
519            .collect::<BTreeSet<_>>();
520        let actual_context_lines = preds
521            .excerpts
522            .iter()
523            .flat_map(|excerpt| excerpt.text.lines().map(ToOwned::to_owned))
524            .collect::<BTreeSet<_>>();
525
526        let matched = expected_context_lines
527            .intersection(&actual_context_lines)
528            .count();
529
530        let actual_patch_lines = preds
531            .diff
532            .lines()
533            .map(DiffLine::parse)
534            .filter(|line| matches!(line, DiffLine::Addition(_) | DiffLine::Deletion(_)))
535            .map(|line| line.to_string())
536            .collect();
537
538        eval_result.edit_prediction = Some(Scores::new(&expected_patch_lines, &actual_patch_lines));
539        eval_result.context_lines_in_expected_patch = expected_context_lines.len();
540        eval_result.context_lines_found_in_context = matched;
541    }
542
543    eval_result
544}
545
546/// Return annotated `patch_a` so that:
547/// Additions and deletions that are not present in `patch_b` will be highlighted in red.
548/// Additions and deletions that are present in `patch_b` will be highlighted in green.
549pub fn compare_diffs(patch_a: &str, patch_b: &str, use_color: bool) -> String {
550    let green = if use_color { "\x1b[32m✓ " } else { "" };
551    let red = if use_color { "\x1b[31m✗ " } else { "" };
552    let neutral = if use_color { "  " } else { "" };
553    let reset = if use_color { "\x1b[0m" } else { "" };
554    let lines_a = patch_a.lines().map(DiffLine::parse);
555    let lines_b: Vec<_> = patch_b.lines().map(DiffLine::parse).collect();
556
557    let annotated = lines_a
558        .map(|line| match line {
559            DiffLine::Addition(_) | DiffLine::Deletion(_) => {
560                if lines_b.contains(&line) {
561                    format!("{green}{line}{reset}")
562                } else {
563                    format!("{red}{line}{reset}")
564                }
565            }
566            _ => format!("{neutral}{line}{reset}"),
567        })
568        .collect::<Vec<String>>();
569
570    annotated.join("\n")
571}
572
573fn write_bucketed_analysis(
574    all_results: &Vec<
575        Vec<Result<(EvaluationResult, ExecutionData), (anyhow::Error, String, Option<u16>)>>,
576    >,
577) -> Result<()> {
578    #[derive(Debug)]
579    struct EditBucket {
580        diff: String,
581        is_correct: bool,
582        execution_indices: Vec<String>,
583        reasoning_samples: Vec<String>,
584    }
585
586    let mut total_executions = 0;
587    let mut empty_predictions = Vec::new();
588    let mut errors = Vec::new();
589
590    let mut buckets: HashMap<String, EditBucket> = HashMap::new();
591
592    for result in all_results.iter().flatten() {
593        total_executions += 1;
594
595        let (evaluation_result, execution_data) = match result {
596            Ok((eval_result, execution_data)) => {
597                if execution_data.diff.is_empty() {
598                    empty_predictions.push(execution_data);
599                    continue;
600                }
601                (eval_result, execution_data)
602            }
603            Err(err) => {
604                errors.push(err);
605                continue;
606            }
607        };
608
609        buckets
610            .entry(execution_data.diff.clone())
611            .and_modify(|bucket| {
612                bucket
613                    .execution_indices
614                    .push(execution_data.execution_id.clone());
615                bucket
616                    .reasoning_samples
617                    .push(execution_data.reasoning.clone());
618            })
619            .or_insert_with(|| EditBucket {
620                diff: execution_data.diff.clone(),
621                is_correct: {
622                    evaluation_result
623                        .edit_prediction
624                        .as_ref()
625                        .map_or(false, |edit_prediction| {
626                            edit_prediction.false_positives == 0
627                                && edit_prediction.false_negatives == 0
628                                && edit_prediction.true_positives > 0
629                        })
630                },
631                execution_indices: vec![execution_data.execution_id.clone()],
632                reasoning_samples: vec![execution_data.reasoning.clone()],
633            });
634    }
635
636    let mut sorted_buckets = buckets.into_values().collect::<Vec<_>>();
637    sorted_buckets.sort_by(|a, b| match (a.is_correct, b.is_correct) {
638        (true, false) => std::cmp::Ordering::Less,
639        (false, true) => std::cmp::Ordering::Greater,
640        _ => b.execution_indices.len().cmp(&a.execution_indices.len()),
641    });
642
643    let output_path = crate::paths::RUN_DIR.join("bucketed_analysis.md");
644    let mut output = std::fs::File::create(&output_path)?;
645
646    writeln!(output, "# Bucketed Edit Analysis\n")?;
647
648    writeln!(output, "## Summary\n")?;
649    writeln!(output, "- **Total executions**: {}", total_executions)?;
650
651    let correct_count: usize = sorted_buckets
652        .iter()
653        .filter(|b| b.is_correct)
654        .map(|b| b.execution_indices.len())
655        .sum();
656
657    let incorrect_count: usize = sorted_buckets
658        .iter()
659        .filter(|b| !b.is_correct)
660        .map(|b| b.execution_indices.len())
661        .sum();
662
663    writeln!(
664        output,
665        "- **Correct predictions**: {} ({:.1}%)",
666        correct_count,
667        (correct_count as f64 / total_executions as f64) * 100.0
668    )?;
669
670    writeln!(
671        output,
672        "- **Incorrect predictions**: {} ({:.1}%)",
673        incorrect_count,
674        (incorrect_count as f64 / total_executions as f64) * 100.0
675    )?;
676
677    writeln!(
678        output,
679        "- **No Predictions**: {} ({:.1}%)",
680        empty_predictions.len(),
681        (empty_predictions.len() as f64 / total_executions as f64) * 100.0
682    )?;
683
684    let unique_incorrect = sorted_buckets.iter().filter(|b| !b.is_correct).count();
685    writeln!(
686        output,
687        "- **Unique incorrect edit patterns**: {}\n",
688        unique_incorrect
689    )?;
690
691    writeln!(output, "---\n")?;
692
693    for (idx, bucket) in sorted_buckets.iter().filter(|b| b.is_correct).enumerate() {
694        if idx == 0 {
695            writeln!(
696                output,
697                "## Correct Predictions ({} occurrences)\n",
698                bucket.execution_indices.len()
699            )?;
700        }
701
702        writeln!(output, "**Predicted Edit:**\n")?;
703        writeln!(output, "```diff")?;
704        writeln!(output, "{}", bucket.diff)?;
705        writeln!(output, "```\n")?;
706
707        writeln!(
708            output,
709            "**Executions:** {}\n",
710            bucket.execution_indices.join(", ")
711        )?;
712        writeln!(output, "---\n")?;
713    }
714
715    for (idx, bucket) in sorted_buckets.iter().filter(|b| !b.is_correct).enumerate() {
716        writeln!(
717            output,
718            "## Incorrect Prediction #{} ({} occurrences)\n",
719            idx + 1,
720            bucket.execution_indices.len()
721        )?;
722
723        writeln!(output, "**Predicted Edit:**\n")?;
724        writeln!(output, "```diff")?;
725        writeln!(output, "{}", bucket.diff)?;
726        writeln!(output, "```\n")?;
727
728        writeln!(
729            output,
730            "**Executions:** {}\n",
731            bucket.execution_indices.join(", ")
732        )?;
733
734        for (exec_id, reasoning) in bucket
735            .execution_indices
736            .iter()
737            .zip(bucket.reasoning_samples.iter())
738        {
739            writeln!(output, "{}", fmt_execution(exec_id, reasoning))?;
740        }
741
742        writeln!(output, "\n---\n")?;
743    }
744
745    if !empty_predictions.is_empty() {
746        writeln!(
747            output,
748            "## No Predictions ({} occurrences)\n",
749            empty_predictions.len()
750        )?;
751
752        for execution_data in &empty_predictions {
753            writeln!(
754                output,
755                "{}",
756                fmt_execution(&execution_data.execution_id, &execution_data.reasoning)
757            )?;
758        }
759        writeln!(output, "\n---\n")?;
760    }
761
762    if !errors.is_empty() {
763        writeln!(output, "## Errors ({} occurrences)\n", errors.len())?;
764
765        for (err, name, repetition_ix) in &errors {
766            writeln!(output, "{}", fmt_evaluation_error(err, name, repetition_ix))?;
767        }
768        writeln!(output, "\n---\n")?;
769    }
770
771    fn fmt_execution(exec_id: &str, reasoning: &str) -> String {
772        let exec_content = format!(
773            "\n### Execution {} `{}/{}/prediction_response.md`{}",
774            exec_id,
775            crate::paths::RUN_DIR.display(),
776            exec_id,
777            indent_text(&format!("\n\n```\n{}\n```\n", reasoning,), 2)
778        );
779        indent_text(&exec_content, 2)
780    }
781
782    fn indent_text(text: &str, spaces: usize) -> String {
783        let indent = " ".repeat(spaces);
784        text.lines()
785            .collect::<Vec<_>>()
786            .join(&format!("\n{}", indent))
787    }
788
789    Ok(())
790}
791
792fn fmt_evaluation_error(err: &anyhow::Error, name: &str, repetition_ix: &Option<u16>) -> String {
793    let err = format!("{err:?}")
794        .replace("<edits", "```xml\n<edits")
795        .replace("</edits>", "</edits>\n```");
796    format!(
797        "### ERROR {name}{}\n\n{err}\n",
798        repetition_ix
799            .map(|ix| format!(" [RUN {ix:03}]"))
800            .unwrap_or_default()
801    )
802}