1//! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]`
2//! attribute to stdout in Markdown. See the documentation of `util_macros::perf`
3//! for usage details on the actual attribute.
4//!
5//! # Setup
6//! Make sure `hyperfine` is installed and in the shell path.
7//!
8//! # Usage
9//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the
10//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect
11//! it to take a little while.
12//!
13//! To test an individual crate, run:
14//! ```sh
15//! cargo perf-test -p $CRATE
16//! ```
17//!
18//! To test everything (which will be **VERY SLOW**), run:
19//! ```sh
20//! cargo perf-test --workspace
21//! ```
22//!
23//! Some command-line parameters are also recognised by this profiler. To filter
24//! out all tests below a certain importance (e.g. `important`), run:
25//! ```sh
26//! cargo perf-test $WHATEVER -- --important
27//! ```
28//!
29//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`.
30//! These flags can be combined.
31//!
32//! ## Comparing runs
33//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g.
34//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json`
35//! being created (unless no tests were run). These results can be automatically
36//! compared. To do so, run `cargo perf-compare new-ident old-ident`.
37//!
38//! NB: All files matching `.perf-runs/ident.*.json` will be considered when
39//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs`
40//! directory that might match that!
41//!
42//! # Notes
43//! This should probably not be called manually unless you're working on the profiler
44//! itself; use the `cargo perf-test` alias (after building this crate) instead.
45
46use perf::{FailKind, Importance, Output, TestMdata, Timings, consts};
47
48use std::{
49 fs::OpenOptions,
50 io::Write,
51 num::NonZero,
52 path::{Path, PathBuf},
53 process::{Command, Stdio},
54 sync::atomic::{AtomicBool, Ordering},
55 time::{Duration, Instant},
56};
57
58/// How many iterations to attempt the first time a test is run.
59const DEFAULT_ITER_COUNT: NonZero<usize> = NonZero::new(3).unwrap();
60/// Multiplier for the iteration count when a test doesn't pass the noise cutoff.
61const ITER_COUNT_MUL: NonZero<usize> = NonZero::new(4).unwrap();
62
63/// Do we keep stderr empty while running the tests?
64static QUIET: AtomicBool = AtomicBool::new(false);
65
66/// Report a failure into the output and skip an iteration.
67macro_rules! fail {
68 ($output:ident, $name:expr, $kind:expr) => {{
69 $output.failure($name, None, None, $kind);
70 continue;
71 }};
72 ($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{
73 $output.failure($name, Some($mdata), None, $kind);
74 continue;
75 }};
76 ($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{
77 $output.failure($name, Some($mdata), Some($count), $kind);
78 continue;
79 }};
80}
81
82/// How does this perf run return its output?
83enum OutputKind<'a> {
84 /// Print markdown to the terminal.
85 Markdown,
86 /// Save JSON to a file.
87 Json(&'a Path),
88}
89
90impl OutputKind<'_> {
91 /// Logs the output of a run as per the `OutputKind`.
92 fn log(&self, output: &Output, t_bin: &str) {
93 match self {
94 OutputKind::Markdown => print!("{output}"),
95 OutputKind::Json(ident) => {
96 // We're going to be in tooling/perf/$whatever.
97 let wspace_dir = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
98 .join("..")
99 .join("..");
100 let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
101 std::fs::create_dir_all(&runs_dir).unwrap();
102 assert!(
103 !ident.to_string_lossy().is_empty(),
104 "FATAL: Empty filename specified!"
105 );
106 // Get the test binary's crate's name; a path like
107 // target/release-fast/deps/gpui-061ff76c9b7af5d7
108 // would be reduced to just "gpui".
109 let test_bin_stripped = Path::new(t_bin)
110 .file_name()
111 .unwrap()
112 .to_str()
113 .unwrap()
114 .rsplit_once('-')
115 .unwrap()
116 .0;
117 let mut file_path = runs_dir.join(ident);
118 file_path
119 .as_mut_os_string()
120 .push(format!(".{test_bin_stripped}.json"));
121 let mut out_file = OpenOptions::new()
122 .write(true)
123 .create(true)
124 .truncate(true)
125 .open(&file_path)
126 .unwrap();
127 out_file
128 .write_all(&serde_json::to_vec(&output).unwrap())
129 .unwrap();
130 if !QUIET.load(Ordering::Relaxed) {
131 eprintln!("JSON output written to {}", file_path.display());
132 }
133 }
134 }
135 }
136}
137
138/// Runs a given metadata-returning function from a test handler, parsing its
139/// output into a `TestMdata`.
140fn parse_mdata(t_bin: &str, mdata_fn: &str) -> Result<TestMdata, FailKind> {
141 let mut cmd = Command::new(t_bin);
142 cmd.args([mdata_fn, "--exact", "--nocapture"]);
143 let out = cmd
144 .output()
145 .expect("FATAL: Could not run test binary {t_bin}");
146 assert!(out.status.success());
147 let stdout = String::from_utf8_lossy(&out.stdout);
148 let mut version = None;
149 let mut iterations = None;
150 let mut importance = Importance::default();
151 let mut weight = consts::WEIGHT_DEFAULT;
152 for line in stdout
153 .lines()
154 .filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF))
155 {
156 let mut items = line.split_whitespace();
157 // For v0, we know the ident always comes first, then one field.
158 match items.next().ok_or(FailKind::BadMetadata)? {
159 consts::VERSION_LINE_NAME => {
160 let v = items
161 .next()
162 .ok_or(FailKind::BadMetadata)?
163 .parse::<u32>()
164 .map_err(|_| FailKind::BadMetadata)?;
165 if v > consts::MDATA_VER {
166 return Err(FailKind::VersionMismatch);
167 }
168 version = Some(v);
169 }
170 consts::ITER_COUNT_LINE_NAME => {
171 // This should never be zero!
172 iterations = Some(
173 items
174 .next()
175 .ok_or(FailKind::BadMetadata)?
176 .parse::<usize>()
177 .map_err(|_| FailKind::BadMetadata)?
178 .try_into()
179 .map_err(|_| FailKind::BadMetadata)?,
180 );
181 }
182 consts::IMPORTANCE_LINE_NAME => {
183 importance = match items.next().ok_or(FailKind::BadMetadata)? {
184 "critical" => Importance::Critical,
185 "important" => Importance::Important,
186 "average" => Importance::Average,
187 "iffy" => Importance::Iffy,
188 "fluff" => Importance::Fluff,
189 _ => return Err(FailKind::BadMetadata),
190 };
191 }
192 consts::WEIGHT_LINE_NAME => {
193 weight = items
194 .next()
195 .ok_or(FailKind::BadMetadata)?
196 .parse::<u8>()
197 .map_err(|_| FailKind::BadMetadata)?;
198 }
199 _ => unreachable!(),
200 }
201 }
202
203 Ok(TestMdata {
204 version: version.ok_or(FailKind::BadMetadata)?,
205 // Iterations may be determined by us and thus left unspecified.
206 iterations,
207 // In principle this should always be set, but just for the sake of
208 // stability allow the potentially-breaking change of not reporting the
209 // importance without erroring. Maybe we want to change this.
210 importance,
211 // Same with weight.
212 weight,
213 })
214}
215
216/// Compares the perf results of two profiles as per the arguments passed in.
217fn compare_profiles(args: &[String]) {
218 let ident_new = args.first().expect("FATAL: missing identifier for new run");
219 let ident_old = args.get(1).expect("FATAL: missing identifier for old run");
220 let wspace_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
221 let runs_dir = PathBuf::from(&wspace_dir)
222 .join("..")
223 .join("..")
224 .join(consts::RUNS_DIR);
225
226 // Use the blank outputs initially, so we can merge into these with prefixes.
227 let mut outputs_new = Output::blank();
228 let mut outputs_old = Output::blank();
229
230 for e in runs_dir.read_dir().unwrap() {
231 let Ok(entry) = e else {
232 continue;
233 };
234 let Ok(metadata) = entry.metadata() else {
235 continue;
236 };
237 if metadata.is_file() {
238 let Ok(name) = entry.file_name().into_string() else {
239 continue;
240 };
241
242 // A little helper to avoid code duplication. Reads the `output` from
243 // a json file, then merges it into what we have so far.
244 let read_into = |output: &mut Output| {
245 let mut elems = name.split('.').skip(1);
246 let prefix = elems.next().unwrap();
247 assert_eq!("json", elems.next().unwrap());
248 assert!(elems.next().is_none());
249 let handle = OpenOptions::new().read(true).open(entry.path()).unwrap();
250 let o_other: Output = serde_json::from_reader(handle).unwrap();
251 output.merge(o_other, prefix);
252 };
253
254 if name.starts_with(ident_old) {
255 read_into(&mut outputs_old);
256 } else if name.starts_with(ident_new) {
257 read_into(&mut outputs_new);
258 }
259 }
260 }
261
262 let res = outputs_new.compare_perf(outputs_old);
263 println!("{res}");
264}
265
266/// Runs a test binary, filtering out tests which aren't marked for perf triage
267/// and giving back the list of tests we care about.
268///
269/// The output of this is an iterator over `test_fn_name, test_mdata_name`.
270fn get_tests(t_bin: &str) -> impl ExactSizeIterator<Item = (String, String)> {
271 let mut cmd = Command::new(t_bin);
272 // --format=json is nightly-only :(
273 cmd.args(["--list", "--format=terse"]);
274 let out = cmd
275 .output()
276 .expect("FATAL: Could not run test binary {t_bin}");
277 assert!(
278 out.status.success(),
279 "FATAL: Cannot do perf check - test binary {t_bin} returned an error"
280 );
281 if !QUIET.load(Ordering::Relaxed) {
282 eprintln!("Test binary ran successfully; starting profile...");
283 }
284 // Parse the test harness output to look for tests we care about.
285 let stdout = String::from_utf8_lossy(&out.stdout);
286 let mut test_list: Vec<_> = stdout
287 .lines()
288 .filter_map(|line| {
289 // This should split only in two; e.g.,
290 // "app::test::test_arena: test" => "app::test::test_arena:", "test"
291 let line: Vec<_> = line.split_whitespace().collect();
292 match line[..] {
293 // Final byte of t_name is ":", which we need to ignore.
294 [t_name, kind] => (kind == "test").then(|| &t_name[..t_name.len() - 1]),
295 _ => None,
296 }
297 })
298 // Exclude tests that aren't marked for perf triage based on suffix.
299 .filter(|t_name| {
300 t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA)
301 })
302 .collect();
303
304 // Pulling itertools just for .dedup() would be quite a big dependency that's
305 // not used elsewhere, so do this on a vec instead.
306 test_list.sort_unstable();
307 test_list.dedup();
308
309 // Tests should come in pairs with their mdata fn!
310 assert!(
311 test_list.len().is_multiple_of(2),
312 "Malformed tests in test binary {t_bin}"
313 );
314
315 let out = test_list
316 .chunks_exact_mut(2)
317 .map(|pair| {
318 // Be resilient against changes to these constants.
319 if consts::SUF_NORMAL < consts::SUF_MDATA {
320 (pair[0].to_owned(), pair[1].to_owned())
321 } else {
322 (pair[1].to_owned(), pair[0].to_owned())
323 }
324 })
325 .collect::<Vec<_>>();
326 out.into_iter()
327}
328
329/// Runs the specified test `count` times, returning the time taken if the test
330/// succeeded.
331#[inline]
332fn spawn_and_iterate(t_bin: &str, t_name: &str, count: NonZero<usize>) -> Option<Duration> {
333 let mut cmd = Command::new(t_bin);
334 cmd.args([t_name, "--exact"]);
335 cmd.env(consts::ITER_ENV_VAR, format!("{count}"));
336 // Don't let the child muck up our stdin/out/err.
337 cmd.stdin(Stdio::null());
338 cmd.stdout(Stdio::null());
339 cmd.stderr(Stdio::null());
340 let pre = Instant::now();
341 // Discard the output beyond ensuring success.
342 let out = cmd.spawn().unwrap().wait();
343 let post = Instant::now();
344 out.iter().find_map(|s| s.success().then_some(post - pre))
345}
346
347/// Triage a test to determine the correct number of iterations that it should run.
348/// Specifically, repeatedly runs the given test until its execution time exceeds
349/// `thresh`, calling `step(iterations)` after every failed run to determine the new
350/// iteration count. Returns `None` if the test errored or `step` returned `None`,
351/// else `Some(iterations)`.
352///
353/// # Panics
354/// This will panic if `step(usize)` is not monotonically increasing, or if the test
355/// binary is invalid.
356fn triage_test(
357 t_bin: &str,
358 t_name: &str,
359 thresh: Duration,
360 mut step: impl FnMut(NonZero<usize>) -> Option<NonZero<usize>>,
361) -> Option<NonZero<usize>> {
362 let mut iter_count = DEFAULT_ITER_COUNT;
363 // It's possible that the first loop of a test might be an outlier (e.g. it's
364 // doing some caching), in which case we want to skip it.
365 let duration_once = spawn_and_iterate(t_bin, t_name, NonZero::new(1).unwrap())?;
366 loop {
367 let duration = spawn_and_iterate(t_bin, t_name, iter_count)?;
368 if duration.saturating_sub(duration_once) > thresh {
369 break Some(iter_count);
370 }
371 let new = step(iter_count)?;
372 assert!(
373 new > iter_count,
374 "FATAL: step must be monotonically increasing"
375 );
376 iter_count = new;
377 }
378}
379
380/// Profiles a given test with hyperfine, returning the mean and standard deviation
381/// for its runtime. If the test errors, returns `None` instead.
382fn hyp_profile(t_bin: &str, t_name: &str, iterations: NonZero<usize>) -> Option<Timings> {
383 let mut perf_cmd = Command::new("hyperfine");
384 // Warm up the cache and print markdown output to stdout, which we parse.
385 perf_cmd.args([
386 "--style",
387 "none",
388 "--warmup",
389 "1",
390 "--export-markdown",
391 "-",
392 // Parse json instead...
393 "--time-unit",
394 "millisecond",
395 &format!("{t_bin} --exact {t_name}"),
396 ]);
397 perf_cmd.env(consts::ITER_ENV_VAR, format!("{iterations}"));
398 let p_out = perf_cmd.output().unwrap();
399 if !p_out.status.success() {
400 return None;
401 }
402
403 let cmd_output = String::from_utf8_lossy(&p_out.stdout);
404 // Can't use .last() since we have a trailing newline. Sigh.
405 let results_line = cmd_output.lines().nth(3).unwrap();
406 // Grab the values out of the pretty-print.
407 // TODO: Parse json instead.
408 let mut res_iter = results_line.split_whitespace();
409 // Durations are given in milliseconds, so account for that.
410 let mean = Duration::from_secs_f64(res_iter.nth(5).unwrap().parse::<f64>().unwrap() / 1000.);
411 let stddev = Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::<f64>().unwrap() / 1000.);
412
413 Some(Timings { mean, stddev })
414}
415
416fn main() {
417 let args = std::env::args().collect::<Vec<_>>();
418 // We get passed the test we need to run as the 1st argument after our own name.
419 let t_bin = args
420 .get(1)
421 .expect("FATAL: No test binary or command; this shouldn't be manually invoked!");
422
423 // We're being asked to compare two results, not run the profiler.
424 if t_bin == "compare" {
425 compare_profiles(&args[2..]);
426 return;
427 }
428
429 // Minimum test importance we care about this run.
430 let mut thresh = Importance::Iffy;
431 // Where to print the output of this run.
432 let mut out_kind = OutputKind::Markdown;
433
434 for arg in args.iter().skip(2) {
435 match arg.as_str() {
436 "--critical" => thresh = Importance::Critical,
437 "--important" => thresh = Importance::Important,
438 "--average" => thresh = Importance::Average,
439 "--iffy" => thresh = Importance::Iffy,
440 "--fluff" => thresh = Importance::Fluff,
441 "--quiet" => QUIET.store(true, Ordering::Relaxed),
442 s if s.starts_with("--json") => {
443 out_kind = OutputKind::Json(Path::new(
444 s.strip_prefix("--json=")
445 .expect("FATAL: Invalid json parameter; pass --json=ident"),
446 ));
447 }
448 _ => (),
449 }
450 }
451 if !QUIET.load(Ordering::Relaxed) {
452 eprintln!("Starting perf check");
453 }
454
455 let mut output = Output::default();
456
457 // Spawn and profile an instance of each perf-sensitive test, via hyperfine.
458 // Each test is a pair of (test, metadata-returning-fn), so grab both. We also
459 // know the list is sorted.
460 let i = get_tests(t_bin);
461 let len = i.len();
462 for (idx, (ref t_name, ref t_mdata)) in i.enumerate() {
463 if !QUIET.load(Ordering::Relaxed) {
464 eprint!("\rProfiling test {}/{}", idx + 1, len);
465 }
466 // Pretty-printable stripped name for the test.
467 let t_name_pretty = t_name.replace(consts::SUF_NORMAL, "");
468
469 // Get the metadata this test reports for us.
470 let t_mdata = match parse_mdata(t_bin, t_mdata) {
471 Ok(mdata) => mdata,
472 Err(err) => fail!(output, t_name_pretty, err),
473 };
474
475 if t_mdata.importance < thresh {
476 fail!(output, t_name_pretty, t_mdata, FailKind::Skipped);
477 }
478
479 // Time test execution to see how many iterations we need to do in order
480 // to account for random noise. This is skipped for tests with fixed
481 // iteration counts.
482 let final_iter_count = t_mdata.iterations.or_else(|| {
483 triage_test(t_bin, t_name, consts::NOISE_CUTOFF, |c| {
484 if let Some(c) = c.checked_mul(ITER_COUNT_MUL) {
485 Some(c)
486 } else {
487 // This should almost never happen, but maybe..?
488 eprintln!(
489 "WARNING: Ran nearly usize::MAX iterations of test {t_name_pretty}; skipping"
490 );
491 None
492 }
493 })
494 });
495
496 // Don't profile failing tests.
497 let Some(final_iter_count) = final_iter_count else {
498 fail!(output, t_name_pretty, t_mdata, FailKind::Triage);
499 };
500
501 // Now profile!
502 if let Some(timings) = hyp_profile(t_bin, t_name, final_iter_count) {
503 output.success(t_name_pretty, t_mdata, final_iter_count, timings);
504 } else {
505 fail!(
506 output,
507 t_name_pretty,
508 t_mdata,
509 final_iter_count,
510 FailKind::Profile
511 );
512 }
513 }
514 if !QUIET.load(Ordering::Relaxed) {
515 if output.is_empty() {
516 eprintln!("Nothing to do.");
517 } else {
518 // If stdout and stderr are on the same terminal, move us after the
519 // output from above.
520 eprintln!();
521 }
522 }
523
524 // No need making an empty json file on every empty test bin.
525 if output.is_empty() {
526 return;
527 }
528
529 out_kind.log(&output, t_bin);
530}