1//! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]`
2//! attribute to stdout in Markdown. See the documentation of `util_macros::perf`
3//! for usage details on the actual attribute.
4//!
5//! # Setup
6//! Make sure `hyperfine` is installed and in the shell path.
7//!
8//! # Usage
9//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the
10//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect
11//! it to take a little while.
12//!
13//! To test an individual crate, run:
14//! ```sh
15//! cargo perf-test -p $CRATE
16//! ```
17//!
18//! To test everything (which will be **VERY SLOW**), run:
19//! ```sh
20//! cargo perf-test --workspace
21//! ```
22//!
23//! Some command-line parameters are also recognised by this profiler. To filter
24//! out all tests below a certain importance (e.g. `important`), run:
25//! ```sh
26//! cargo perf-test $WHATEVER -- --important
27//! ```
28//!
29//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`.
30//! These flags can be combined.
31//!
32//! ## Comparing runs
33//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g.
34//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json`
35//! being created (unless no tests were run). These results can be automatically
36//! compared. To do so, run `cargo perf-compare new-ident old-ident`.
37//!
38//! To save the markdown output to a file instead, run `cargo perf-compare --save=$FILE
39//! new-ident old-ident`.
40//!
41//! NB: All files matching `.perf-runs/ident.*.json` will be considered when
42//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs`
43//! directory that might match that!
44//!
45//! # Notes
46//! This should probably not be called manually unless you're working on the profiler
47//! itself; use the `cargo perf-test` alias (after building this crate) instead.
48
49mod implementation;
50
51use implementation::{FailKind, Importance, Output, TestMdata, Timings, consts};
52
53use std::{
54 fs::OpenOptions,
55 io::{Read, Write},
56 num::NonZero,
57 path::{Path, PathBuf},
58 process::{Command, Stdio},
59 sync::atomic::{AtomicBool, Ordering},
60 time::{Duration, Instant},
61};
62
63/// How many iterations to attempt the first time a test is run.
64const DEFAULT_ITER_COUNT: NonZero<usize> = NonZero::new(3).unwrap();
65/// Multiplier for the iteration count when a test doesn't pass the noise cutoff.
66const ITER_COUNT_MUL: NonZero<usize> = NonZero::new(4).unwrap();
67
68/// Do we keep stderr empty while running the tests?
69static QUIET: AtomicBool = AtomicBool::new(false);
70
71/// Report a failure into the output and skip an iteration.
72macro_rules! fail {
73 ($output:ident, $name:expr, $kind:expr) => {{
74 $output.failure($name, None, None, $kind);
75 continue;
76 }};
77 ($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{
78 $output.failure($name, Some($mdata), None, $kind);
79 continue;
80 }};
81 ($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{
82 $output.failure($name, Some($mdata), Some($count), $kind);
83 continue;
84 }};
85}
86
87/// How does this perf run return its output?
88enum OutputKind<'a> {
89 /// Print markdown to the terminal.
90 Markdown,
91 /// Save JSON to a file.
92 Json(&'a Path),
93}
94
95impl OutputKind<'_> {
96 /// Logs the output of a run as per the `OutputKind`.
97 fn log(&self, output: &Output, t_bin: &str) {
98 match self {
99 OutputKind::Markdown => println!("{output}"),
100 OutputKind::Json(ident) => {
101 // We're going to be in tooling/perf/$whatever.
102 let wspace_dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
103 .join("..")
104 .join("..");
105 let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
106 std::fs::create_dir_all(&runs_dir).unwrap();
107 assert!(
108 !ident.to_string_lossy().is_empty(),
109 "FATAL: Empty filename specified!"
110 );
111 // Get the test binary's crate's name; a path like
112 // target/release-fast/deps/gpui-061ff76c9b7af5d7
113 // would be reduced to just "gpui".
114 let test_bin_stripped = Path::new(t_bin)
115 .file_name()
116 .unwrap()
117 .to_str()
118 .unwrap()
119 .rsplit_once('-')
120 .unwrap()
121 .0;
122 let mut file_path = runs_dir.join(ident);
123 file_path
124 .as_mut_os_string()
125 .push(format!(".{test_bin_stripped}.json"));
126 let mut out_file = OpenOptions::new()
127 .write(true)
128 .create(true)
129 .truncate(true)
130 .open(&file_path)
131 .unwrap();
132 out_file
133 .write_all(&serde_json::to_vec(&output).unwrap())
134 .unwrap();
135 if !QUIET.load(Ordering::Relaxed) {
136 eprintln!("JSON output written to {}", file_path.display());
137 }
138 }
139 }
140 }
141}
142
143/// Runs a given metadata-returning function from a test handler, parsing its
144/// output into a `TestMdata`.
145fn parse_mdata(t_bin: &str, mdata_fn: &str) -> Result<TestMdata, FailKind> {
146 let mut cmd = Command::new(t_bin);
147 cmd.args([mdata_fn, "--exact", "--nocapture"]);
148 let out = cmd
149 .output()
150 .expect("FATAL: Could not run test binary {t_bin}");
151 assert!(out.status.success());
152 let stdout = String::from_utf8_lossy(&out.stdout);
153 let mut version = None;
154 let mut iterations = None;
155 let mut importance = Importance::default();
156 let mut weight = consts::WEIGHT_DEFAULT;
157 for line in stdout
158 .lines()
159 .filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF))
160 {
161 let mut items = line.split_whitespace();
162 // For v0, we know the ident always comes first, then one field.
163 match items.next().ok_or(FailKind::BadMetadata)? {
164 consts::VERSION_LINE_NAME => {
165 let v = items
166 .next()
167 .ok_or(FailKind::BadMetadata)?
168 .parse::<u32>()
169 .map_err(|_| FailKind::BadMetadata)?;
170 if v > consts::MDATA_VER {
171 return Err(FailKind::VersionMismatch);
172 }
173 version = Some(v);
174 }
175 consts::ITER_COUNT_LINE_NAME => {
176 // This should never be zero!
177 iterations = Some(
178 items
179 .next()
180 .ok_or(FailKind::BadMetadata)?
181 .parse::<usize>()
182 .map_err(|_| FailKind::BadMetadata)?
183 .try_into()
184 .map_err(|_| FailKind::BadMetadata)?,
185 );
186 }
187 consts::IMPORTANCE_LINE_NAME => {
188 importance = match items.next().ok_or(FailKind::BadMetadata)? {
189 "critical" => Importance::Critical,
190 "important" => Importance::Important,
191 "average" => Importance::Average,
192 "iffy" => Importance::Iffy,
193 "fluff" => Importance::Fluff,
194 _ => return Err(FailKind::BadMetadata),
195 };
196 }
197 consts::WEIGHT_LINE_NAME => {
198 weight = items
199 .next()
200 .ok_or(FailKind::BadMetadata)?
201 .parse::<u8>()
202 .map_err(|_| FailKind::BadMetadata)?;
203 }
204 _ => unreachable!(),
205 }
206 }
207
208 Ok(TestMdata {
209 version: version.ok_or(FailKind::BadMetadata)?,
210 // Iterations may be determined by us and thus left unspecified.
211 iterations,
212 // In principle this should always be set, but just for the sake of
213 // stability allow the potentially-breaking change of not reporting the
214 // importance without erroring. Maybe we want to change this.
215 importance,
216 // Same with weight.
217 weight,
218 })
219}
220
221/// Compares the perf results of two profiles as per the arguments passed in.
222fn compare_profiles(args: &[String]) {
223 let mut save_to = None;
224 let mut ident_idx = 0;
225 args.first().inspect(|a| {
226 if a.starts_with("--save") {
227 save_to = Some(
228 a.strip_prefix("--save=")
229 .expect("FATAL: save param formatted incorrectly"),
230 );
231 ident_idx = 1;
232 }
233 });
234 let ident_new = args
235 .get(ident_idx)
236 .expect("FATAL: missing identifier for new run");
237 let ident_old = args
238 .get(ident_idx + 1)
239 .expect("FATAL: missing identifier for old run");
240 let wspace_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
241 let runs_dir = PathBuf::from(&wspace_dir)
242 .join("..")
243 .join("..")
244 .join(consts::RUNS_DIR);
245
246 // Use the blank outputs initially, so we can merge into these with prefixes.
247 let mut outputs_new = Output::blank();
248 let mut outputs_old = Output::blank();
249
250 for e in runs_dir.read_dir().unwrap() {
251 let Ok(entry) = e else {
252 continue;
253 };
254 let Ok(metadata) = entry.metadata() else {
255 continue;
256 };
257 if metadata.is_file() {
258 let Ok(name) = entry.file_name().into_string() else {
259 continue;
260 };
261
262 // A little helper to avoid code duplication. Reads the `output` from
263 // a json file, then merges it into what we have so far.
264 let read_into = |output: &mut Output| {
265 let mut elems = name.split('.').skip(1);
266 let prefix = elems.next().unwrap();
267 assert_eq!("json", elems.next().unwrap());
268 assert!(elems.next().is_none());
269 let mut buffer = Vec::new();
270 let _ = OpenOptions::new()
271 .read(true)
272 .open(entry.path())
273 .unwrap()
274 .read_to_end(&mut buffer)
275 .unwrap();
276 let o_other: Output = serde_json::from_slice(&buffer).unwrap();
277 output.merge(o_other, prefix);
278 };
279
280 if name.starts_with(ident_old) {
281 read_into(&mut outputs_old);
282 } else if name.starts_with(ident_new) {
283 read_into(&mut outputs_new);
284 }
285 }
286 }
287
288 let res = outputs_new.compare_perf(outputs_old);
289 if let Some(filename) = save_to {
290 let mut file = std::fs::OpenOptions::new()
291 .create(true)
292 .write(true)
293 .truncate(true)
294 .open(filename)
295 .expect("FATAL: couldn't save run results to file");
296 file.write_all(format!("{res}").as_bytes()).unwrap();
297 } else {
298 println!("{res}");
299 }
300}
301
302/// Runs a test binary, filtering out tests which aren't marked for perf triage
303/// and giving back the list of tests we care about.
304///
305/// The output of this is an iterator over `test_fn_name, test_mdata_name`.
306fn get_tests(t_bin: &str) -> impl ExactSizeIterator<Item = (String, String)> {
307 let mut cmd = Command::new(t_bin);
308 // --format=json is nightly-only :(
309 cmd.args(["--list", "--format=terse"]);
310 let out = cmd
311 .output()
312 .expect("FATAL: Could not run test binary {t_bin}");
313 assert!(
314 out.status.success(),
315 "FATAL: Cannot do perf check - test binary {t_bin} returned an error"
316 );
317 if !QUIET.load(Ordering::Relaxed) {
318 eprintln!("Test binary ran successfully; starting profile...");
319 }
320 // Parse the test harness output to look for tests we care about.
321 let stdout = String::from_utf8_lossy(&out.stdout);
322 let mut test_list: Vec<_> = stdout
323 .lines()
324 .filter_map(|line| {
325 // This should split only in two; e.g.,
326 // "app::test::test_arena: test" => "app::test::test_arena:", "test"
327 let line: Vec<_> = line.split_whitespace().collect();
328 match line[..] {
329 // Final byte of t_name is ":", which we need to ignore.
330 [t_name, kind] => (kind == "test").then(|| &t_name[..t_name.len() - 1]),
331 _ => None,
332 }
333 })
334 // Exclude tests that aren't marked for perf triage based on suffix.
335 .filter(|t_name| {
336 t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA)
337 })
338 .collect();
339
340 // Pulling itertools just for .dedup() would be quite a big dependency that's
341 // not used elsewhere, so do this on a vec instead.
342 test_list.sort_unstable();
343 test_list.dedup();
344
345 // Tests should come in pairs with their mdata fn!
346 assert!(
347 test_list.len().is_multiple_of(2),
348 "Malformed tests in test binary {t_bin}"
349 );
350
351 let out = test_list
352 .chunks_exact_mut(2)
353 .map(|pair| {
354 // Be resilient against changes to these constants.
355 if consts::SUF_NORMAL < consts::SUF_MDATA {
356 (pair[0].to_owned(), pair[1].to_owned())
357 } else {
358 (pair[1].to_owned(), pair[0].to_owned())
359 }
360 })
361 .collect::<Vec<_>>();
362 out.into_iter()
363}
364
365/// Runs the specified test `count` times, returning the time taken if the test
366/// succeeded.
367#[inline]
368fn spawn_and_iterate(t_bin: &str, t_name: &str, count: NonZero<usize>) -> Option<Duration> {
369 let mut cmd = Command::new(t_bin);
370 cmd.args([t_name, "--exact"]);
371 cmd.env(consts::ITER_ENV_VAR, format!("{count}"));
372 // Don't let the child muck up our stdin/out/err.
373 cmd.stdin(Stdio::null());
374 cmd.stdout(Stdio::null());
375 cmd.stderr(Stdio::null());
376 let pre = Instant::now();
377 // Discard the output beyond ensuring success.
378 let out = cmd.spawn().unwrap().wait();
379 let post = Instant::now();
380 out.iter().find_map(|s| s.success().then_some(post - pre))
381}
382
383/// Triage a test to determine the correct number of iterations that it should run.
384/// Specifically, repeatedly runs the given test until its execution time exceeds
385/// `thresh`, calling `step(iterations)` after every failed run to determine the new
386/// iteration count. Returns `None` if the test errored or `step` returned `None`,
387/// else `Some(iterations)`.
388///
389/// # Panics
390/// This will panic if `step(usize)` is not monotonically increasing, or if the test
391/// binary is invalid.
392fn triage_test(
393 t_bin: &str,
394 t_name: &str,
395 thresh: Duration,
396 mut step: impl FnMut(NonZero<usize>) -> Option<NonZero<usize>>,
397) -> Option<NonZero<usize>> {
398 let mut iter_count = DEFAULT_ITER_COUNT;
399 // It's possible that the first loop of a test might be an outlier (e.g. it's
400 // doing some caching), in which case we want to skip it.
401 let duration_once = spawn_and_iterate(t_bin, t_name, NonZero::new(1).unwrap())?;
402 loop {
403 let duration = spawn_and_iterate(t_bin, t_name, iter_count)?;
404 if duration.saturating_sub(duration_once) > thresh {
405 break Some(iter_count);
406 }
407 let new = step(iter_count)?;
408 assert!(
409 new > iter_count,
410 "FATAL: step must be monotonically increasing"
411 );
412 iter_count = new;
413 }
414}
415
416/// Profiles a given test with hyperfine, returning the mean and standard deviation
417/// for its runtime. If the test errors, returns `None` instead.
418fn hyp_profile(t_bin: &str, t_name: &str, iterations: NonZero<usize>) -> Option<Timings> {
419 let mut perf_cmd = Command::new("hyperfine");
420 // Warm up the cache and print markdown output to stdout, which we parse.
421 perf_cmd.args([
422 "--style",
423 "none",
424 "--warmup",
425 "1",
426 "--export-markdown",
427 "-",
428 // Parse json instead...
429 "--time-unit",
430 "millisecond",
431 &format!("{t_bin} --exact {t_name}"),
432 ]);
433 perf_cmd.env(consts::ITER_ENV_VAR, format!("{iterations}"));
434 let p_out = perf_cmd.output().unwrap();
435 if !p_out.status.success() {
436 return None;
437 }
438
439 let cmd_output = String::from_utf8_lossy(&p_out.stdout);
440 // Can't use .last() since we have a trailing newline. Sigh.
441 let results_line = cmd_output.lines().nth(3).unwrap();
442 // Grab the values out of the pretty-print.
443 // TODO: Parse json instead.
444 let mut res_iter = results_line.split_whitespace();
445 // Durations are given in milliseconds, so account for that.
446 let mean = Duration::from_secs_f64(res_iter.nth(5).unwrap().parse::<f64>().unwrap() / 1000.);
447 let stddev = Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::<f64>().unwrap() / 1000.);
448
449 Some(Timings { mean, stddev })
450}
451
452fn main() {
453 let args = std::env::args().collect::<Vec<_>>();
454 // We get passed the test we need to run as the 1st argument after our own name.
455 let t_bin = args
456 .get(1)
457 .expect("FATAL: No test binary or command; this shouldn't be manually invoked!");
458
459 // We're being asked to compare two results, not run the profiler.
460 if t_bin == "compare" {
461 compare_profiles(&args[2..]);
462 return;
463 }
464
465 // Minimum test importance we care about this run.
466 let mut thresh = Importance::Iffy;
467 // Where to print the output of this run.
468 let mut out_kind = OutputKind::Markdown;
469
470 for arg in args.iter().skip(2) {
471 match arg.as_str() {
472 "--critical" => thresh = Importance::Critical,
473 "--important" => thresh = Importance::Important,
474 "--average" => thresh = Importance::Average,
475 "--iffy" => thresh = Importance::Iffy,
476 "--fluff" => thresh = Importance::Fluff,
477 "--quiet" => QUIET.store(true, Ordering::Relaxed),
478 s if s.starts_with("--json") => {
479 out_kind = OutputKind::Json(Path::new(
480 s.strip_prefix("--json=")
481 .expect("FATAL: Invalid json parameter; pass --json=ident"),
482 ));
483 }
484 _ => (),
485 }
486 }
487 if !QUIET.load(Ordering::Relaxed) {
488 eprintln!("Starting perf check");
489 }
490
491 let mut output = Output::default();
492
493 // Spawn and profile an instance of each perf-sensitive test, via hyperfine.
494 // Each test is a pair of (test, metadata-returning-fn), so grab both. We also
495 // know the list is sorted.
496 let i = get_tests(t_bin);
497 let len = i.len();
498 for (idx, (ref t_name, ref t_mdata)) in i.enumerate() {
499 if !QUIET.load(Ordering::Relaxed) {
500 eprint!("\rProfiling test {}/{}", idx + 1, len);
501 }
502 // Pretty-printable stripped name for the test.
503 let t_name_pretty = t_name.replace(consts::SUF_NORMAL, "");
504
505 // Get the metadata this test reports for us.
506 let t_mdata = match parse_mdata(t_bin, t_mdata) {
507 Ok(mdata) => mdata,
508 Err(err) => fail!(output, t_name_pretty, err),
509 };
510
511 if t_mdata.importance < thresh {
512 fail!(output, t_name_pretty, t_mdata, FailKind::Skipped);
513 }
514
515 // Time test execution to see how many iterations we need to do in order
516 // to account for random noise. This is skipped for tests with fixed
517 // iteration counts.
518 let final_iter_count = t_mdata.iterations.or_else(|| {
519 triage_test(t_bin, t_name, consts::NOISE_CUTOFF, |c| {
520 if let Some(c) = c.checked_mul(ITER_COUNT_MUL) {
521 Some(c)
522 } else {
523 // This should almost never happen, but maybe..?
524 eprintln!(
525 "WARNING: Ran nearly usize::MAX iterations of test {t_name_pretty}; skipping"
526 );
527 None
528 }
529 })
530 });
531
532 // Don't profile failing tests.
533 let Some(final_iter_count) = final_iter_count else {
534 fail!(output, t_name_pretty, t_mdata, FailKind::Triage);
535 };
536
537 // Now profile!
538 if let Some(timings) = hyp_profile(t_bin, t_name, final_iter_count) {
539 output.success(t_name_pretty, t_mdata, final_iter_count, timings);
540 } else {
541 fail!(
542 output,
543 t_name_pretty,
544 t_mdata,
545 final_iter_count,
546 FailKind::Profile
547 );
548 }
549 }
550 if !QUIET.load(Ordering::Relaxed) {
551 if output.is_empty() {
552 eprintln!("Nothing to do.");
553 } else {
554 // If stdout and stderr are on the same terminal, move us after the
555 // output from above.
556 eprintln!();
557 }
558 }
559
560 // No need making an empty json file on every empty test bin.
561 if output.is_empty() {
562 return;
563 }
564
565 out_kind.log(&output, t_bin);
566}