main.rs

  1//! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]`
  2//! attribute to stdout in Markdown. See the documentation of `util_macros::perf`
  3//! for usage details on the actual attribute.
  4//!
  5//! # Setup
  6//! Make sure `hyperfine` is installed and in the shell path.
  7//!
  8//! # Usage
  9//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the
 10//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect
 11//! it to take a little while.
 12//!
 13//! To test an individual crate, run:
 14//! ```sh
 15//! cargo perf-test -p $CRATE
 16//! ```
 17//!
 18//! To test everything (which will be **VERY SLOW**), run:
 19//! ```sh
 20//! cargo perf-test --workspace
 21//! ```
 22//!
 23//! Some command-line parameters are also recognised by this profiler. To filter
 24//! out all tests below a certain importance (e.g. `important`), run:
 25//! ```sh
 26//! cargo perf-test $WHATEVER -- --important
 27//! ```
 28//!
 29//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`.
 30//! These flags can be combined.
 31//!
 32//! ## Comparing runs
 33//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g.
 34//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json`
 35//! being created (unless no tests were run). These results can be automatically
 36//! compared. To do so, run `cargo perf-compare new-ident old-ident`.
 37//!
 38//! To save the markdown output to a file instead, run `cargo perf-compare --save=$FILE
 39//! new-ident old-ident`.
 40//!
 41//! NB: All files matching `.perf-runs/ident.*.json` will be considered when
 42//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs`
 43//! directory that might match that!
 44//!
 45//! # Notes
 46//! This should probably not be called manually unless you're working on the profiler
 47//! itself; use the `cargo perf-test` alias (after building this crate) instead.
 48
 49use zed_perf::{FailKind, Importance, Output, TestMdata, Timings, consts};
 50
 51use std::{
 52    fs::OpenOptions,
 53    io::Write,
 54    num::NonZero,
 55    path::{Path, PathBuf},
 56    process::{Command, Stdio},
 57    sync::atomic::{AtomicBool, Ordering},
 58    time::{Duration, Instant},
 59};
 60
 61/// How many iterations to attempt the first time a test is run.
 62const DEFAULT_ITER_COUNT: NonZero<usize> = NonZero::new(3).unwrap();
 63/// Multiplier for the iteration count when a test doesn't pass the noise cutoff.
 64const ITER_COUNT_MUL: NonZero<usize> = NonZero::new(4).unwrap();
 65
 66/// Do we keep stderr empty while running the tests?
 67static QUIET: AtomicBool = AtomicBool::new(false);
 68
 69/// Report a failure into the output and skip an iteration.
 70macro_rules! fail {
 71    ($output:ident, $name:expr, $kind:expr) => {{
 72        $output.failure($name, None, None, $kind);
 73        continue;
 74    }};
 75    ($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{
 76        $output.failure($name, Some($mdata), None, $kind);
 77        continue;
 78    }};
 79    ($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{
 80        $output.failure($name, Some($mdata), Some($count), $kind);
 81        continue;
 82    }};
 83}
 84
 85/// How does this perf run return its output?
 86enum OutputKind<'a> {
 87    /// Print markdown to the terminal.
 88    Markdown,
 89    /// Save JSON to a file.
 90    Json(&'a Path),
 91}
 92
 93impl OutputKind<'_> {
 94    /// Logs the output of a run as per the `OutputKind`.
 95    fn log(&self, output: &Output, t_bin: &str) {
 96        match self {
 97            OutputKind::Markdown => println!("{output}"),
 98            OutputKind::Json(ident) => {
 99                // We're going to be in tooling/perf/$whatever.
100                let wspace_dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
101                    .join("..")
102                    .join("..");
103                let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
104                std::fs::create_dir_all(&runs_dir).unwrap();
105                assert!(
106                    !ident.to_string_lossy().is_empty(),
107                    "FATAL: Empty filename specified!"
108                );
109                // Get the test binary's crate's name; a path like
110                // target/release-fast/deps/gpui-061ff76c9b7af5d7
111                // would be reduced to just "gpui".
112                let test_bin_stripped = Path::new(t_bin)
113                    .file_name()
114                    .unwrap()
115                    .to_str()
116                    .unwrap()
117                    .rsplit_once('-')
118                    .unwrap()
119                    .0;
120                let mut file_path = runs_dir.join(ident);
121                file_path
122                    .as_mut_os_string()
123                    .push(format!(".{test_bin_stripped}.json"));
124                let mut out_file = OpenOptions::new()
125                    .write(true)
126                    .create(true)
127                    .truncate(true)
128                    .open(&file_path)
129                    .unwrap();
130                out_file
131                    .write_all(&serde_json::to_vec(&output).unwrap())
132                    .unwrap();
133                if !QUIET.load(Ordering::Relaxed) {
134                    eprintln!("JSON output written to {}", file_path.display());
135                }
136            }
137        }
138    }
139}
140
141/// Runs a given metadata-returning function from a test handler, parsing its
142/// output into a `TestMdata`.
143fn parse_mdata(t_bin: &str, mdata_fn: &str) -> Result<TestMdata, FailKind> {
144    let mut cmd = Command::new(t_bin);
145    cmd.args([mdata_fn, "--exact", "--nocapture"]);
146    let out = cmd
147        .output()
148        .expect("FATAL: Could not run test binary {t_bin}");
149    assert!(out.status.success());
150    let stdout = String::from_utf8_lossy(&out.stdout);
151    let mut version = None;
152    let mut iterations = None;
153    let mut importance = Importance::default();
154    let mut weight = consts::WEIGHT_DEFAULT;
155    for line in stdout
156        .lines()
157        .filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF))
158    {
159        let mut items = line.split_whitespace();
160        // For v0, we know the ident always comes first, then one field.
161        match items.next().ok_or(FailKind::BadMetadata)? {
162            consts::VERSION_LINE_NAME => {
163                let v = items
164                    .next()
165                    .ok_or(FailKind::BadMetadata)?
166                    .parse::<u32>()
167                    .map_err(|_| FailKind::BadMetadata)?;
168                if v > consts::MDATA_VER {
169                    return Err(FailKind::VersionMismatch);
170                }
171                version = Some(v);
172            }
173            consts::ITER_COUNT_LINE_NAME => {
174                // This should never be zero!
175                iterations = Some(
176                    items
177                        .next()
178                        .ok_or(FailKind::BadMetadata)?
179                        .parse::<usize>()
180                        .map_err(|_| FailKind::BadMetadata)?
181                        .try_into()
182                        .map_err(|_| FailKind::BadMetadata)?,
183                );
184            }
185            consts::IMPORTANCE_LINE_NAME => {
186                importance = match items.next().ok_or(FailKind::BadMetadata)? {
187                    "critical" => Importance::Critical,
188                    "important" => Importance::Important,
189                    "average" => Importance::Average,
190                    "iffy" => Importance::Iffy,
191                    "fluff" => Importance::Fluff,
192                    _ => return Err(FailKind::BadMetadata),
193                };
194            }
195            consts::WEIGHT_LINE_NAME => {
196                weight = items
197                    .next()
198                    .ok_or(FailKind::BadMetadata)?
199                    .parse::<u8>()
200                    .map_err(|_| FailKind::BadMetadata)?;
201            }
202            _ => unreachable!(),
203        }
204    }
205
206    Ok(TestMdata {
207        version: version.ok_or(FailKind::BadMetadata)?,
208        // Iterations may be determined by us and thus left unspecified.
209        iterations,
210        // In principle this should always be set, but just for the sake of
211        // stability allow the potentially-breaking change of not reporting the
212        // importance without erroring. Maybe we want to change this.
213        importance,
214        // Same with weight.
215        weight,
216    })
217}
218
219/// Compares the perf results of two profiles as per the arguments passed in.
220fn compare_profiles(args: &[String]) {
221    let mut save_to = None;
222    let mut ident_idx = 0;
223    args.first().inspect(|a| {
224        if a.starts_with("--save") {
225            save_to = Some(
226                a.strip_prefix("--save=")
227                    .expect("FATAL: save param formatted incorrectly"),
228            );
229        }
230        ident_idx = 1;
231    });
232    let ident_new = args
233        .get(ident_idx)
234        .expect("FATAL: missing identifier for new run");
235    let ident_old = args
236        .get(ident_idx + 1)
237        .expect("FATAL: missing identifier for old run");
238    let wspace_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
239    let runs_dir = PathBuf::from(&wspace_dir)
240        .join("..")
241        .join("..")
242        .join(consts::RUNS_DIR);
243
244    // Use the blank outputs initially, so we can merge into these with prefixes.
245    let mut outputs_new = Output::blank();
246    let mut outputs_old = Output::blank();
247
248    for e in runs_dir.read_dir().unwrap() {
249        let Ok(entry) = e else {
250            continue;
251        };
252        let Ok(metadata) = entry.metadata() else {
253            continue;
254        };
255        if metadata.is_file() {
256            let Ok(name) = entry.file_name().into_string() else {
257                continue;
258            };
259
260            // A little helper to avoid code duplication. Reads the `output` from
261            // a json file, then merges it into what we have so far.
262            let read_into = |output: &mut Output| {
263                let mut elems = name.split('.').skip(1);
264                let prefix = elems.next().unwrap();
265                assert_eq!("json", elems.next().unwrap());
266                assert!(elems.next().is_none());
267                let handle = OpenOptions::new().read(true).open(entry.path()).unwrap();
268                let o_other: Output = serde_json::from_reader(handle).unwrap();
269                output.merge(o_other, prefix);
270            };
271
272            if name.starts_with(ident_old) {
273                read_into(&mut outputs_old);
274            } else if name.starts_with(ident_new) {
275                read_into(&mut outputs_new);
276            }
277        }
278    }
279
280    let res = outputs_new.compare_perf(outputs_old);
281    if let Some(filename) = save_to {
282        let mut file = std::fs::OpenOptions::new()
283            .create(true)
284            .write(true)
285            .truncate(true)
286            .open(filename)
287            .expect("FATAL: couldn't save run results to file");
288        file.write_all(format!("{res}").as_bytes()).unwrap();
289    } else {
290        println!("{res}");
291    }
292}
293
294/// Runs a test binary, filtering out tests which aren't marked for perf triage
295/// and giving back the list of tests we care about.
296///
297/// The output of this is an iterator over `test_fn_name, test_mdata_name`.
298fn get_tests(t_bin: &str) -> impl ExactSizeIterator<Item = (String, String)> {
299    let mut cmd = Command::new(t_bin);
300    // --format=json is nightly-only :(
301    cmd.args(["--list", "--format=terse"]);
302    let out = cmd
303        .output()
304        .expect("FATAL: Could not run test binary {t_bin}");
305    assert!(
306        out.status.success(),
307        "FATAL: Cannot do perf check - test binary {t_bin} returned an error"
308    );
309    if !QUIET.load(Ordering::Relaxed) {
310        eprintln!("Test binary ran successfully; starting profile...");
311    }
312    // Parse the test harness output to look for tests we care about.
313    let stdout = String::from_utf8_lossy(&out.stdout);
314    let mut test_list: Vec<_> = stdout
315        .lines()
316        .filter_map(|line| {
317            // This should split only in two; e.g.,
318            // "app::test::test_arena: test" => "app::test::test_arena:", "test"
319            let line: Vec<_> = line.split_whitespace().collect();
320            match line[..] {
321                // Final byte of t_name is ":", which we need to ignore.
322                [t_name, kind] => (kind == "test").then(|| &t_name[..t_name.len() - 1]),
323                _ => None,
324            }
325        })
326        // Exclude tests that aren't marked for perf triage based on suffix.
327        .filter(|t_name| {
328            t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA)
329        })
330        .collect();
331
332    // Pulling itertools just for .dedup() would be quite a big dependency that's
333    // not used elsewhere, so do this on a vec instead.
334    test_list.sort_unstable();
335    test_list.dedup();
336
337    // Tests should come in pairs with their mdata fn!
338    assert!(
339        test_list.len().is_multiple_of(2),
340        "Malformed tests in test binary {t_bin}"
341    );
342
343    let out = test_list
344        .chunks_exact_mut(2)
345        .map(|pair| {
346            // Be resilient against changes to these constants.
347            if consts::SUF_NORMAL < consts::SUF_MDATA {
348                (pair[0].to_owned(), pair[1].to_owned())
349            } else {
350                (pair[1].to_owned(), pair[0].to_owned())
351            }
352        })
353        .collect::<Vec<_>>();
354    out.into_iter()
355}
356
357/// Runs the specified test `count` times, returning the time taken if the test
358/// succeeded.
359#[inline]
360fn spawn_and_iterate(t_bin: &str, t_name: &str, count: NonZero<usize>) -> Option<Duration> {
361    let mut cmd = Command::new(t_bin);
362    cmd.args([t_name, "--exact"]);
363    cmd.env(consts::ITER_ENV_VAR, format!("{count}"));
364    // Don't let the child muck up our stdin/out/err.
365    cmd.stdin(Stdio::null());
366    cmd.stdout(Stdio::null());
367    cmd.stderr(Stdio::null());
368    let pre = Instant::now();
369    // Discard the output beyond ensuring success.
370    let out = cmd.spawn().unwrap().wait();
371    let post = Instant::now();
372    out.iter().find_map(|s| s.success().then_some(post - pre))
373}
374
375/// Triage a test to determine the correct number of iterations that it should run.
376/// Specifically, repeatedly runs the given test until its execution time exceeds
377/// `thresh`, calling `step(iterations)` after every failed run to determine the new
378/// iteration count. Returns `None` if the test errored or `step` returned `None`,
379/// else `Some(iterations)`.
380///
381/// # Panics
382/// This will panic if `step(usize)` is not monotonically increasing, or if the test
383/// binary is invalid.
384fn triage_test(
385    t_bin: &str,
386    t_name: &str,
387    thresh: Duration,
388    mut step: impl FnMut(NonZero<usize>) -> Option<NonZero<usize>>,
389) -> Option<NonZero<usize>> {
390    let mut iter_count = DEFAULT_ITER_COUNT;
391    // It's possible that the first loop of a test might be an outlier (e.g. it's
392    // doing some caching), in which case we want to skip it.
393    let duration_once = spawn_and_iterate(t_bin, t_name, NonZero::new(1).unwrap())?;
394    loop {
395        let duration = spawn_and_iterate(t_bin, t_name, iter_count)?;
396        if duration.saturating_sub(duration_once) > thresh {
397            break Some(iter_count);
398        }
399        let new = step(iter_count)?;
400        assert!(
401            new > iter_count,
402            "FATAL: step must be monotonically increasing"
403        );
404        iter_count = new;
405    }
406}
407
408/// Profiles a given test with hyperfine, returning the mean and standard deviation
409/// for its runtime. If the test errors, returns `None` instead.
410fn hyp_profile(t_bin: &str, t_name: &str, iterations: NonZero<usize>) -> Option<Timings> {
411    let mut perf_cmd = Command::new("hyperfine");
412    // Warm up the cache and print markdown output to stdout, which we parse.
413    perf_cmd.args([
414        "--style",
415        "none",
416        "--warmup",
417        "1",
418        "--export-markdown",
419        "-",
420        // Parse json instead...
421        "--time-unit",
422        "millisecond",
423        &format!("{t_bin} --exact {t_name}"),
424    ]);
425    perf_cmd.env(consts::ITER_ENV_VAR, format!("{iterations}"));
426    let p_out = perf_cmd.output().unwrap();
427    if !p_out.status.success() {
428        return None;
429    }
430
431    let cmd_output = String::from_utf8_lossy(&p_out.stdout);
432    // Can't use .last() since we have a trailing newline. Sigh.
433    let results_line = cmd_output.lines().nth(3).unwrap();
434    // Grab the values out of the pretty-print.
435    // TODO: Parse json instead.
436    let mut res_iter = results_line.split_whitespace();
437    // Durations are given in milliseconds, so account for that.
438    let mean = Duration::from_secs_f64(res_iter.nth(5).unwrap().parse::<f64>().unwrap() / 1000.);
439    let stddev = Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::<f64>().unwrap() / 1000.);
440
441    Some(Timings { mean, stddev })
442}
443
444fn main() {
445    let args = std::env::args().collect::<Vec<_>>();
446    // We get passed the test we need to run as the 1st argument after our own name.
447    let t_bin = args
448        .get(1)
449        .expect("FATAL: No test binary or command; this shouldn't be manually invoked!");
450
451    // We're being asked to compare two results, not run the profiler.
452    if t_bin == "compare" {
453        compare_profiles(&args[2..]);
454        return;
455    }
456
457    // Minimum test importance we care about this run.
458    let mut thresh = Importance::Iffy;
459    // Where to print the output of this run.
460    let mut out_kind = OutputKind::Markdown;
461
462    for arg in args.iter().skip(2) {
463        match arg.as_str() {
464            "--critical" => thresh = Importance::Critical,
465            "--important" => thresh = Importance::Important,
466            "--average" => thresh = Importance::Average,
467            "--iffy" => thresh = Importance::Iffy,
468            "--fluff" => thresh = Importance::Fluff,
469            "--quiet" => QUIET.store(true, Ordering::Relaxed),
470            s if s.starts_with("--json") => {
471                out_kind = OutputKind::Json(Path::new(
472                    s.strip_prefix("--json=")
473                        .expect("FATAL: Invalid json parameter; pass --json=ident"),
474                ));
475            }
476            _ => (),
477        }
478    }
479    if !QUIET.load(Ordering::Relaxed) {
480        eprintln!("Starting perf check");
481    }
482
483    let mut output = Output::default();
484
485    // Spawn and profile an instance of each perf-sensitive test, via hyperfine.
486    // Each test is a pair of (test, metadata-returning-fn), so grab both. We also
487    // know the list is sorted.
488    let i = get_tests(t_bin);
489    let len = i.len();
490    for (idx, (ref t_name, ref t_mdata)) in i.enumerate() {
491        if !QUIET.load(Ordering::Relaxed) {
492            eprint!("\rProfiling test {}/{}", idx + 1, len);
493        }
494        // Pretty-printable stripped name for the test.
495        let t_name_pretty = t_name.replace(consts::SUF_NORMAL, "");
496
497        // Get the metadata this test reports for us.
498        let t_mdata = match parse_mdata(t_bin, t_mdata) {
499            Ok(mdata) => mdata,
500            Err(err) => fail!(output, t_name_pretty, err),
501        };
502
503        if t_mdata.importance < thresh {
504            fail!(output, t_name_pretty, t_mdata, FailKind::Skipped);
505        }
506
507        // Time test execution to see how many iterations we need to do in order
508        // to account for random noise. This is skipped for tests with fixed
509        // iteration counts.
510        let final_iter_count = t_mdata.iterations.or_else(|| {
511            triage_test(t_bin, t_name, consts::NOISE_CUTOFF, |c| {
512                if let Some(c) = c.checked_mul(ITER_COUNT_MUL) {
513                    Some(c)
514                } else {
515                    // This should almost never happen, but maybe..?
516                    eprintln!(
517                        "WARNING: Ran nearly usize::MAX iterations of test {t_name_pretty}; skipping"
518                    );
519                    None
520                }
521            })
522        });
523
524        // Don't profile failing tests.
525        let Some(final_iter_count) = final_iter_count else {
526            fail!(output, t_name_pretty, t_mdata, FailKind::Triage);
527        };
528
529        // Now profile!
530        if let Some(timings) = hyp_profile(t_bin, t_name, final_iter_count) {
531            output.success(t_name_pretty, t_mdata, final_iter_count, timings);
532        } else {
533            fail!(
534                output,
535                t_name_pretty,
536                t_mdata,
537                final_iter_count,
538                FailKind::Profile
539            );
540        }
541    }
542    if !QUIET.load(Ordering::Relaxed) {
543        if output.is_empty() {
544            eprintln!("Nothing to do.");
545        } else {
546            // If stdout and stderr are on the same terminal, move us after the
547            // output from above.
548            eprintln!();
549        }
550    }
551
552    // No need making an empty json file on every empty test bin.
553    if output.is_empty() {
554        return;
555    }
556
557    out_kind.log(&output, t_bin);
558}