Detailed changes
@@ -5,6 +5,7 @@ rustflags = ["-C", "symbol-mangling-version=v0", "--cfg", "tokio_unstable"]
[alias]
xtask = "run --package xtask --"
perf-test = ["test", "--profile", "release-fast", "--lib", "--bins", "--tests", "--config", "target.'cfg(true)'.runner='target/release/perf'", "--config", "target.'cfg(true)'.rustflags=[\"--cfg\", \"perf_enabled\"]"]
+perf-compare = ["run", "--release", "-p", "perf", "--", "compare"]
[target.x86_64-unknown-linux-gnu]
linker = "clang"
@@ -24,3 +25,4 @@ rustflags = [
[env]
MACOSX_DEPLOYMENT_TARGET = "10.15.7"
+CARGO_WORKSPACE_DIR = { value = "", relative = true }
@@ -20,6 +20,7 @@
.venv
.vscode
.wrangler
+.perf-runs
/assets/*licenses.*
/crates/collab/seed.json
/crates/theme/schemas/theme.json
@@ -12169,6 +12169,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
name = "perf"
version = "0.1.0"
dependencies = [
+ "collections",
+ "serde",
+ "serde_json",
"workspace-hack",
]
@@ -18586,6 +18589,7 @@ dependencies = [
name = "util_macros"
version = "0.1.0"
dependencies = [
+ "perf",
"quote",
"syn 2.0.101",
"workspace-hack",
@@ -356,6 +356,7 @@ outline = { path = "crates/outline" }
outline_panel = { path = "crates/outline_panel" }
panel = { path = "crates/panel" }
paths = { path = "crates/paths" }
+perf = { path = "tooling/perf" }
picker = { path = "crates/picker" }
plugin = { path = "crates/plugin" }
plugin_macros = { path = "crates/plugin_macros" }
@@ -16,6 +16,7 @@ doctest = false
[dependencies]
quote.workspace = true
syn.workspace = true
+perf.workspace = true
workspace-hack.workspace = true
[features]
@@ -1,6 +1,7 @@
#![cfg_attr(not(target_os = "windows"), allow(unused))]
#![allow(clippy::test_attr_in_doctest)]
+use perf::*;
use proc_macro::TokenStream;
use quote::{ToTokens, quote};
use syn::{ItemFn, LitStr, parse_macro_input, parse_quote};
@@ -90,68 +91,81 @@ pub fn line_endings(input: TokenStream) -> TokenStream {
}
/// Inner data for the perf macro.
+#[derive(Default)]
struct PerfArgs {
- /// How many times to loop a test before rerunning the test binary.
- /// If left empty, the test harness will auto-determine this value.
+ /// How many times to loop a test before rerunning the test binary. If left
+ /// empty, the test harness will auto-determine this value.
iterations: Option<syn::Expr>,
+ /// How much this test's results should be weighed when comparing across runs.
+ /// If unspecified, defaults to `WEIGHT_DEFAULT` (50).
+ weight: Option<syn::Expr>,
+ /// How relevant a benchmark is to overall performance. See docs on the enum
+ /// for details. If unspecified, `Average` is selected.
+ importance: Importance,
}
-impl syn::parse::Parse for PerfArgs {
- fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
- if input.is_empty() {
- return Ok(PerfArgs { iterations: None });
- }
-
- let mut iterations = None;
- // In principle we only have one possible argument, but leave this as
- // a loop in case we expand this in the future.
- for meta in
- syn::punctuated::Punctuated::<syn::Meta, syn::Token![,]>::parse_terminated(input)?
- {
- match &meta {
- syn::Meta::NameValue(meta_name_value) => {
- if meta_name_value.path.is_ident("iterations") {
- iterations = Some(meta_name_value.value.clone());
- } else {
- return Err(syn::Error::new_spanned(
- &meta_name_value.path,
- "unexpected argument, expected 'iterations'",
- ));
- }
- }
- _ => {
- return Err(syn::Error::new_spanned(
- meta,
- "expected name-value argument like 'iterations = 1'",
- ));
- }
- }
+#[warn(clippy::all, clippy::pedantic)]
+impl PerfArgs {
+ /// Parses attribute arguments into a `PerfArgs`.
+ fn parse_into(&mut self, meta: syn::meta::ParseNestedMeta) -> syn::Result<()> {
+ if meta.path.is_ident("iterations") {
+ self.iterations = Some(meta.value()?.parse()?);
+ } else if meta.path.is_ident("weight") {
+ self.weight = Some(meta.value()?.parse()?);
+ } else if meta.path.is_ident("critical") {
+ self.importance = Importance::Critical;
+ } else if meta.path.is_ident("important") {
+ self.importance = Importance::Important;
+ } else if meta.path.is_ident("average") {
+ // This shouldn't be specified manually, but oh well.
+ self.importance = Importance::Average;
+ } else if meta.path.is_ident("iffy") {
+ self.importance = Importance::Iffy;
+ } else if meta.path.is_ident("fluff") {
+ self.importance = Importance::Fluff;
+ } else {
+ return Err(syn::Error::new_spanned(meta.path, "unexpected identifier"));
}
-
- Ok(PerfArgs { iterations })
+ Ok(())
}
}
/// Marks a test as perf-sensitive, to be triaged when checking the performance
/// of a build. This also automatically applies `#[test]`.
///
+///
+/// # Usage
+/// Applying this attribute to a test marks it as average importance by default.
+/// There are 4 levels of importance (`Critical`, `Important`, `Average`, `Fluff`);
+/// see the documentation on `Importance` for details. Add the importance as a
+/// parameter to override the default (e.g. `#[perf(important)]`).
+///
+/// Each test also has a weight factor. This is irrelevant on its own, but is considered
+/// when comparing results across different runs. By default, this is set to 50;
+/// pass `weight = n` as a parameter to override this. Note that this value is only
+/// relevant within its importance category.
+///
/// By default, the number of iterations when profiling this test is auto-determined.
-/// If this needs to be overwritten, pass the desired iteration count to the macro
-/// as a parameter (`#[perf(iterations = n)]`). Note that the actual profiler may still
-/// run the test an arbitrary number times; this flag just sets the number of executions
-/// before the process is restarted and global state is reset.
+/// If this needs to be overwritten, pass the desired iteration count as a parameter
+/// (`#[perf(iterations = n)]`). Note that the actual profiler may still run the test
+/// an arbitrary number times; this flag just sets the number of executions before the
+/// process is restarted and global state is reset.
///
-/// # Usage notes
-/// This should probably not be applied to tests that do any significant fs IO, as
-/// locks on files may not be released in time when repeating a test many times. This
-/// might lead to spurious failures.
+/// This attribute should probably not be applied to tests that do any significant
+/// disk IO, as locks on files may not be released in time when repeating a test many
+/// times. This might lead to spurious failures.
///
/// # Examples
/// ```rust
/// use util_macros::perf;
///
/// #[perf]
-/// fn expensive_computation_test() {
+/// fn generic_test() {
+/// // Test goes here.
+/// }
+///
+/// #[perf(fluff, weight = 30)]
+/// fn cold_path_test() {
/// // Test goes here.
/// }
/// ```
@@ -161,72 +175,108 @@ impl syn::parse::Parse for PerfArgs {
/// ```rust,ignore
/// use util_macros::perf;
///
-/// #[perf(iterations = 1)]
+/// #[perf(iterations = 1, critical)]
/// #[gpui::test]
/// fn oneshot_test(_cx: &mut gpui::TestAppContext) {
/// // Test goes here.
/// }
/// ```
#[proc_macro_attribute]
+#[warn(clippy::all, clippy::pedantic)]
pub fn perf(our_attr: TokenStream, input: TokenStream) -> TokenStream {
- // If any of the below constants are changed, make sure to also update the perf
- // profiler to match!
-
- /// The suffix on tests marked with `#[perf]`.
- const SUF_NORMAL: &str = "__ZED_PERF";
- /// The suffix on tests marked with `#[perf(iterations = n)]`.
- const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER";
- /// The env var in which we pass the iteration count to our tests.
- const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
-
- let iter_count = parse_macro_input!(our_attr as PerfArgs).iterations;
+ let mut args = PerfArgs::default();
+ let parser = syn::meta::parser(|meta| PerfArgs::parse_into(&mut args, meta));
+ parse_macro_input!(our_attr with parser);
let ItemFn {
- mut attrs,
+ attrs: mut attrs_main,
vis,
- mut sig,
+ sig: mut sig_main,
block,
} = parse_macro_input!(input as ItemFn);
- attrs.push(parse_quote!(#[test]));
- attrs.push(parse_quote!(#[allow(non_snake_case)]));
+ attrs_main.push(parse_quote!(#[test]));
+ attrs_main.push(parse_quote!(#[allow(non_snake_case)]));
+
+ let fns = if cfg!(perf_enabled) {
+ #[allow(clippy::wildcard_imports, reason = "We control the other side")]
+ use consts::*;
- let block: Box<syn::Block> = if cfg!(perf_enabled) {
// Make the ident obvious when calling, for the test parser.
- let mut new_ident = sig.ident.to_string();
- if iter_count.is_some() {
- new_ident.push_str(SUF_FIXED);
- } else {
- new_ident.push_str(SUF_NORMAL);
- }
+ // Also set up values for the second metadata-returning "test".
+ let mut new_ident_main = sig_main.ident.to_string();
+ let mut new_ident_meta = new_ident_main.clone();
+ new_ident_main.push_str(SUF_NORMAL);
+ new_ident_meta.push_str(SUF_MDATA);
- let new_ident = syn::Ident::new(&new_ident, sig.ident.span());
- sig.ident = new_ident;
- // If we have a preset iteration count, just use that.
- if let Some(iter_count) = iter_count {
- parse_quote!({
- for _ in 0..#iter_count {
- #block
- }
- })
- } else {
- // Otherwise, the perf harness will pass us the value in an env var.
+ let new_ident_main = syn::Ident::new(&new_ident_main, sig_main.ident.span());
+ sig_main.ident = new_ident_main;
+
+ // We don't want any nonsense if the original test had a weird signature.
+ let new_ident_meta = syn::Ident::new(&new_ident_meta, sig_main.ident.span());
+ let sig_meta = parse_quote!(fn #new_ident_meta());
+ let attrs_meta = parse_quote!(#[test] #[allow(non_snake_case)]);
+
+ // Make the test loop as the harness instructs it to.
+ let block_main = {
+ // The perf harness will pass us the value in an env var. Even if we
+ // have a preset value, just do this to keep the code paths unified.
parse_quote!({
let iter_count = std::env::var(#ITER_ENV_VAR).unwrap().parse::<usize>().unwrap();
for _ in 0..iter_count {
#block
}
})
- }
+ };
+ let importance = format!("{}", args.importance);
+ let block_meta = {
+ // This function's job is to just print some relevant info to stdout,
+ // based on the params this attr is passed. It's not an actual test.
+ // Since we use a custom attr set on our metadata fn, it shouldn't
+ // cause problems with xfail tests.
+ let q_iter = if let Some(iter) = args.iterations {
+ quote! {
+ println!("{} {} {}", #MDATA_LINE_PREF, #ITER_COUNT_LINE_NAME, #iter);
+ }
+ } else {
+ quote! {}
+ };
+ let weight = args
+ .weight
+ .unwrap_or_else(|| parse_quote! { #WEIGHT_DEFAULT });
+ parse_quote!({
+ #q_iter
+ println!("{} {} {}", #MDATA_LINE_PREF, #WEIGHT_LINE_NAME, #weight);
+ println!("{} {} {}", #MDATA_LINE_PREF, #IMPORTANCE_LINE_NAME, #importance);
+ println!("{} {} {}", #MDATA_LINE_PREF, #VERSION_LINE_NAME, #MDATA_VER);
+ })
+ };
+
+ vec![
+ // The real test.
+ ItemFn {
+ attrs: attrs_main,
+ vis: vis.clone(),
+ sig: sig_main,
+ block: block_main,
+ },
+ // The fake test.
+ ItemFn {
+ attrs: attrs_meta,
+ vis,
+ sig: sig_meta,
+ block: block_meta,
+ },
+ ]
} else {
- block
+ vec![ItemFn {
+ attrs: attrs_main,
+ vis,
+ sig: sig_main,
+ block,
+ }]
};
- ItemFn {
- attrs,
- vis,
- sig,
- block,
- }
- .into_token_stream()
- .into()
+ fns.into_iter()
+ .flat_map(|f| TokenStream::from(f.into_token_stream()))
+ .collect()
}
@@ -1,11 +1,30 @@
[package]
name = "perf"
version = "0.1.0"
+description = "A tool for measuring Zed test performance, with too many Clippy lints"
publish.workspace = true
edition.workspace = true
-[lints]
-workspace = true
+[lib]
+
+# Some personal lint preferences :3
+[lints.rust]
+missing_docs = "warn"
+
+[lints.clippy]
+needless_continue = "allow" # For a convenience macro
+all = "warn"
+pedantic = "warn"
+style = "warn"
+missing_docs_in_private_items = "warn"
+as_underscore = "deny"
+allow_attributes_without_reason = "deny"
+let_underscore_must_use = "forbid"
+undocumented_unsafe_blocks = "forbid"
+missing_safety_doc = "forbid"
[dependencies]
+collections.workspace = true
+serde.workspace = true
+serde_json.workspace = true
workspace-hack.workspace = true
@@ -0,0 +1,413 @@
+//! Some constants and datatypes used in the Zed perf profiler. Should only be
+//! consumed by the crate providing the matching macros.
+
+use collections::HashMap;
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+pub mod consts {
+ //! Preset idenitifiers and constants so that the profiler and proc macro agree
+ //! on their communication protocol.
+
+ /// The suffix on the actual test function.
+ pub const SUF_NORMAL: &str = "__ZED_PERF_FN";
+ /// The suffix on an extra function which prints metadata about a test to stdout.
+ pub const SUF_MDATA: &str = "__ZED_PERF_MDATA";
+ /// The env var in which we pass the iteration count to our tests.
+ pub const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
+ /// The prefix printed on all benchmark test metadata lines, to distinguish it from
+ /// possible output by the test harness itself.
+ pub const MDATA_LINE_PREF: &str = "ZED_MDATA_";
+ /// The version number for the data returned from the test metadata function.
+ /// Increment on non-backwards-compatible changes.
+ pub const MDATA_VER: u32 = 0;
+ /// The default weight, if none is specified.
+ pub const WEIGHT_DEFAULT: u8 = 50;
+ /// How long a test must have run to be assumed to be reliable-ish.
+ pub const NOISE_CUTOFF: std::time::Duration = std::time::Duration::from_millis(250);
+
+ /// Identifier for the iteration count of a test metadata.
+ pub const ITER_COUNT_LINE_NAME: &str = "iter_count";
+ /// Identifier for the weight of a test metadata.
+ pub const WEIGHT_LINE_NAME: &str = "weight";
+ /// Identifier for importance in test metadata.
+ pub const IMPORTANCE_LINE_NAME: &str = "importance";
+ /// Identifier for the test metadata version.
+ pub const VERSION_LINE_NAME: &str = "version";
+
+ /// Where to save json run information.
+ pub const RUNS_DIR: &str = ".perf-runs";
+}
+
+/// How relevant a benchmark is.
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
+pub enum Importance {
+ /// Regressions shouldn't be accepted without good reason.
+ Critical = 4,
+ /// Regressions should be paid extra attention.
+ Important = 3,
+ /// No extra attention should be paid to regressions, but they might still
+ /// be indicative of something happening.
+ #[default]
+ Average = 2,
+ /// Unclear if regressions are likely to be meaningful, but still worth keeping
+ /// an eye on. Lowest level that's checked by default by the profiler.
+ Iffy = 1,
+ /// Regressions are likely to be spurious or don't affect core functionality.
+ /// Only relevant if a lot of them happen, or as supplemental evidence for a
+ /// higher-importance benchmark regressing. Not checked by default.
+ Fluff = 0,
+}
+
+impl std::fmt::Display for Importance {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Importance::Critical => f.write_str("critical"),
+ Importance::Important => f.write_str("important"),
+ Importance::Average => f.write_str("average"),
+ Importance::Iffy => f.write_str("iffy"),
+ Importance::Fluff => f.write_str("fluff"),
+ }
+ }
+}
+
+/// Why or when did this test fail?
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub enum FailKind {
+ /// Failed while triaging it to determine the iteration count.
+ Triage,
+ /// Failed while profiling it.
+ Profile,
+ /// Failed due to an incompatible version for the test.
+ VersionMismatch,
+ /// Skipped due to filters applied on the perf run.
+ Skipped,
+}
+
+impl std::fmt::Display for FailKind {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ FailKind::Triage => f.write_str("failed in triage"),
+ FailKind::Profile => f.write_str("failed while profiling"),
+ FailKind::VersionMismatch => f.write_str("test version mismatch"),
+ FailKind::Skipped => f.write_str("skipped"),
+ }
+ }
+}
+
+/// Information about a given perf test.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct TestMdata {
+ /// A version number for when the test was generated. If this is greater
+ /// than the version this test handler expects, one of the following will
+ /// happen in an unspecified manner:
+ /// - The test is skipped silently.
+ /// - The handler exits with an error message indicating the version mismatch
+ /// or inability to parse the metadata.
+ ///
+ /// INVARIANT: If `version` <= `MDATA_VER`, this tool *must* be able to
+ /// correctly parse the output of this test.
+ pub version: u32,
+ /// How many iterations to pass this test, if this is preset.
+ pub iterations: Option<usize>,
+ /// The importance of this particular test. See the docs on `Importance` for
+ /// details.
+ pub importance: Importance,
+ /// The weight of this particular test within its importance category. Used
+ /// when comparing across runs.
+ pub weight: u8,
+}
+
+/// The actual timings of a test, as measured by Hyperfine.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct Timings {
+ /// Mean runtime for `self.iter_total` runs of this test.
+ pub mean: Duration,
+ /// Standard deviation for the above.
+ pub stddev: Duration,
+}
+
+impl Timings {
+ /// How many iterations does this test seem to do per second?
+ #[expect(
+ clippy::cast_precision_loss,
+ reason = "We only care about a couple sig figs anyways"
+ )]
+ #[must_use]
+ pub fn iters_per_sec(&self, total_iters: usize) -> f64 {
+ (1000. / self.mean.as_millis() as f64) * total_iters as f64
+ }
+}
+
+/// Aggregate output of all tests run by this handler.
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+pub struct Output {
+ /// A list of test outputs. Format is `(test_name, iter_count, timings)`.
+ /// The latter being set indicates the test succeeded.
+ ///
+ /// INVARIANT: If the test succeeded, the second field is `Some(mdata)` and
+ /// `mdata.iterations` is `Some(_)`.
+ tests: Vec<(String, Option<TestMdata>, Result<Timings, FailKind>)>,
+}
+
+impl Output {
+ /// Instantiates an empty "output". Useful for merging.
+ #[must_use]
+ pub fn blank() -> Self {
+ Output { tests: Vec::new() }
+ }
+
+ /// Reports a success and adds it to this run's `Output`.
+ pub fn success(
+ &mut self,
+ name: impl AsRef<str>,
+ mut mdata: TestMdata,
+ iters: usize,
+ timings: Timings,
+ ) {
+ mdata.iterations = Some(iters);
+ self.tests
+ .push((name.as_ref().to_string(), Some(mdata), Ok(timings)));
+ }
+
+ /// Reports a failure and adds it to this run's `Output`. If this test was tried
+ /// with some number of iterations (i.e. this was not a version mismatch or skipped
+ /// test), it should be reported also.
+ ///
+ /// Using the `fail!()` macro is usually more convenient.
+ pub fn failure(
+ &mut self,
+ name: impl AsRef<str>,
+ mut mdata: Option<TestMdata>,
+ attempted_iters: Option<usize>,
+ kind: FailKind,
+ ) {
+ if let Some(ref mut mdata) = mdata {
+ mdata.iterations = attempted_iters;
+ }
+ self.tests
+ .push((name.as_ref().to_string(), mdata, Err(kind)));
+ }
+
+ /// True if no tests executed this run.
+ #[must_use]
+ pub fn is_empty(&self) -> bool {
+ self.tests.is_empty()
+ }
+
+ /// Sorts the runs in the output in the order that we want it printed.
+ pub fn sort(&mut self) {
+ self.tests.sort_unstable_by(|a, b| match (a, b) {
+ // Tests where we got no metadata go at the end.
+ ((_, Some(_), _), (_, None, _)) => std::cmp::Ordering::Greater,
+ ((_, None, _), (_, Some(_), _)) => std::cmp::Ordering::Less,
+ // Then sort by importance, then weight.
+ ((_, Some(a_mdata), _), (_, Some(b_mdata), _)) => {
+ let c = a_mdata.importance.cmp(&b_mdata.importance);
+ if matches!(c, std::cmp::Ordering::Equal) {
+ a_mdata.weight.cmp(&b_mdata.weight)
+ } else {
+ c
+ }
+ }
+ // Lastly by name.
+ ((a_name, ..), (b_name, ..)) => a_name.cmp(b_name),
+ });
+ }
+
+ /// Merges the output of two runs, appending a prefix to the results of the new run.
+ /// To be used in conjunction with `Output::blank()`, or else only some tests will have
+ /// a prefix set.
+ pub fn merge(&mut self, other: Self, pref_other: impl AsRef<str>) {
+ self.tests = std::mem::take(&mut self.tests)
+ .into_iter()
+ .chain(other.tests.into_iter().map(|(name, md, tm)| {
+ let mut new_name = "crates/".to_string();
+ new_name.push_str(pref_other.as_ref());
+ new_name.push_str("::");
+ new_name.push_str(&name);
+ (new_name, md, tm)
+ }))
+ .collect();
+ }
+
+ /// Evaluates the performance of `self` against `baseline`. The latter is taken
+ /// as the comparison point, i.e. a positive resulting `PerfReport` means that
+ /// `self` performed better.
+ ///
+ /// # Panics
+ /// `self` and `baseline` are assumed to have the iterations field on all
+ /// `TestMdata`s set to `Some(_)` if the `TestMdata` is present itself.
+ #[must_use]
+ pub fn compare_perf(self, baseline: Self) -> PerfReport {
+ let self_categories = self.collapse();
+ let mut other_categories = baseline.collapse();
+
+ let deltas = self_categories
+ .into_iter()
+ .filter_map(|(cat, self_data)| {
+ // Only compare categories where both meow
+ // runs have data. /
+ let mut other_data = other_categories.remove(&cat)?;
+ let mut max = 0.;
+ let mut min = 0.;
+
+ // Running totals for averaging out tests.
+ let mut r_total_numerator = 0.;
+ let mut r_total_denominator = 0;
+ // Yeah this is O(n^2), but realistically it'll hardly be a bottleneck.
+ for (name, (s_timings, s_iters, weight)) in self_data {
+ // Only use the new weights if they conflict.
+ let Some((o_timings, o_iters, _)) = other_data.remove(&name) else {
+ continue;
+ };
+ let shift =
+ (s_timings.iters_per_sec(s_iters) / o_timings.iters_per_sec(o_iters)) - 1.;
+ if shift > max {
+ max = shift;
+ }
+ if shift < min {
+ min = shift;
+ }
+ r_total_numerator += shift * f64::from(weight);
+ r_total_denominator += u32::from(weight);
+ }
+ let mean = r_total_numerator / f64::from(r_total_denominator);
+ // TODO: also aggregate standard deviation? that's harder to keep
+ // meaningful, though, since we dk which tests are correlated
+ Some((cat, PerfDelta { max, mean, min }))
+ })
+ .collect();
+
+ PerfReport { deltas }
+ }
+
+ /// Collapses the `PerfReport` into a `HashMap` of `Importance` <-> tests
+ /// each represented as a map of `name, (Timings, iterations, weight)`.
+ fn collapse(self) -> HashMap<Importance, HashMap<String, (Timings, usize, u8)>> {
+ let mut categories = HashMap::<Importance, HashMap<String, _>>::default();
+ for entry in self.tests {
+ if let Some(mdata) = entry.1
+ && let Ok(timings) = entry.2
+ {
+ if let Some(handle) = categories.get_mut(&mdata.importance) {
+ handle.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
+ } else {
+ let mut new = HashMap::default();
+ new.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
+ categories.insert(mdata.importance, new);
+ }
+ }
+ }
+
+ categories
+ }
+}
+
+impl std::fmt::Display for Output {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ // Don't print the header for an empty run.
+ if self.tests.is_empty() {
+ return Ok(());
+ }
+
+ // We want to print important tests at the top, then alphabetical.
+ let mut sorted = self.clone();
+ sorted.sort();
+ // Markdown header for making a nice little table :>
+ writeln!(
+ f,
+ "| Command | Iter/sec | Mean [ms] | SD [ms] | Iterations | Importance (weight) |",
+ )?;
+ writeln!(f, "|:---|---:|---:|---:|---:|---:|")?;
+ for (name, metadata, timings) in &sorted.tests {
+ match metadata {
+ Some(metadata) => match timings {
+ // Happy path.
+ Ok(timings) => {
+ // If the test succeeded, then metadata.iterations is Some(_).
+ writeln!(
+ f,
+ "| {} | {:.2} | {} | {:.2} | {} | {} ({}) |",
+ name,
+ timings.iters_per_sec(metadata.iterations.unwrap()),
+ {
+ // Very small mean runtimes will give inaccurate
+ // results. Should probably also penalise weight.
+ let mean = timings.mean.as_secs_f64() * 1000.;
+ if mean < consts::NOISE_CUTOFF.as_secs_f64() * 1000. / 8. {
+ format!("{mean:.2} (unreliable)")
+ } else {
+ format!("{mean:.2}")
+ }
+ },
+ timings.stddev.as_secs_f64() * 1000.,
+ metadata.iterations.unwrap(),
+ metadata.importance,
+ metadata.weight,
+ )?;
+ }
+ // We have (some) metadata, but the test errored.
+ Err(err) => writeln!(
+ f,
+ "| ({}) {} | N/A | N/A | N/A | {} | {} ({}) |",
+ err,
+ name,
+ metadata
+ .iterations
+ .map_or_else(|| "N/A".to_owned(), |i| format!("{i}")),
+ metadata.importance,
+ metadata.weight
+ )?,
+ },
+ // No metadata, couldn't even parse the test output.
+ None => writeln!(
+ f,
+ "| ({}) {} | N/A | N/A | N/A | N/A | N/A |",
+ timings.as_ref().unwrap_err(),
+ name
+ )?,
+ }
+ }
+ writeln!(f)?;
+ Ok(())
+ }
+}
+
+/// The difference in performance between two runs within a given importance
+/// category.
+struct PerfDelta {
+ /// The biggest improvement / least bad regression.
+ max: f64,
+ /// The weighted average change in test times.
+ mean: f64,
+ /// The worst regression / smallest improvement.
+ min: f64,
+}
+
+/// Shim type for reporting all performance deltas across importance categories.
+pub struct PerfReport {
+ /// Inner (group, diff) pairing.
+ deltas: HashMap<Importance, PerfDelta>,
+}
+
+impl std::fmt::Display for PerfReport {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ if self.deltas.is_empty() {
+ return write!(f, "(no matching tests)");
+ }
+ let sorted = self.deltas.iter().collect::<Vec<_>>();
+ writeln!(f, "| Category | Max | Mean | Min |")?;
+ // We don't want to print too many newlines at the end, so handle newlines
+ // a little jankily like this.
+ write!(f, "|:---|---:|---:|---:|")?;
+ for (cat, delta) in sorted.into_iter().rev() {
+ write!(
+ f,
+ "\n| {cat} | {:.3} | {:.3} | {:.3} |",
+ delta.max, delta.mean, delta.min
+ )?;
+ }
+ Ok(())
+ }
+}
@@ -1,18 +1,15 @@
-#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)]
-#![cfg_attr(release, deny(warnings))]
-
//! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]`
//! attribute to stdout in Markdown. See the documentation of `util_macros::perf`
//! for usage details on the actual attribute.
//!
//! # Setup
//! Make sure `hyperfine` is installed and in the shell path, then run
-//! `cargo build --bin perf --workspace --release` to build the profiler.
+//! `cargo build -p perf --release` to build the profiler.
//!
//! # Usage
-//! Calling this tool rebuilds everything with some cfg flags set for the perf
-//! proc macro *and* enables optimisations (`release-fast` profile), so expect it
-//! to take a little while.
+//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the
+//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect
+//! it to take a little while.
//!
//! To test an individual crate, run:
//! ```sh
@@ -24,37 +21,221 @@
//! cargo perf-test --workspace
//! ```
//!
+//! Some command-line parameters are also recognised by this profiler. To filter
+//! out all tests below a certain importance (e.g. `important`), run:
+//! ```sh
+//! cargo perf-test $WHATEVER -- --important
+//! ```
+//!
+//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`.
+//! These flags can be combined.
+//!
+//! ## Comparing runs
+//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g.
+//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json`
+//! being created (unless no tests were run). These results can be automatically
+//! compared. To do so, run `cargo perf-compare new-ident old-ident`.
+//!
+//! NB: All files matching `.perf-runs/ident.*.json` will be considered when
+//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs`
+//! directory that might match that!
+//!
//! # Notes
//! This should probably not be called manually unless you're working on the profiler
//! itself; use the `cargo perf-test` alias (after building this crate) instead.
+#[allow(clippy::wildcard_imports, reason = "Our crate")]
+use perf::*;
+
use std::{
+ fs::OpenOptions,
+ io::Write,
+ path::{Path, PathBuf},
process::{Command, Stdio},
time::{Duration, Instant},
};
/// How many iterations to attempt the first time a test is run.
-const DEFAULT_ITER_COUNT: usize = 12;
+const DEFAULT_ITER_COUNT: usize = 3;
/// Multiplier for the iteration count when a test doesn't pass the noise cutoff.
const ITER_COUNT_MUL: usize = 4;
-/// How long a test must have run to be assumed to be reliable-ish.
-const NOISE_CUTOFF: Duration = Duration::from_millis(250);
-// If any of the below constants are changed, make sure to also update the perf
-// proc macro to match!
+/// Report a failure into the output and skip an iteration.
+macro_rules! fail {
+ ($output:ident, $name:expr, $kind:expr) => {{
+ $output.failure($name, None, None, $kind);
+ continue;
+ }};
+ ($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{
+ $output.failure($name, Some($mdata), None, $kind);
+ continue;
+ }};
+ ($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{
+ $output.failure($name, Some($mdata), Some($count), $kind);
+ continue;
+ }};
+}
-/// The suffix on tests marked with `#[perf]`.
-const SUF_NORMAL: &str = "__ZED_PERF";
-/// The suffix on tests marked with `#[perf(iterations = n)]`.
-const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER";
-/// The env var in which we pass the iteration count to our tests.
-const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
+/// How does this perf run return its output?
+enum OutputKind<'a> {
+ /// Print markdown to the terminal.
+ Markdown,
+ /// Save JSON to a file.
+ Json(&'a Path),
+}
-#[allow(clippy::too_many_lines)]
+/// Runs a given metadata-returning function from a test handler, parsing its
+/// output into a `TestMdata`.
+fn parse_mdata(test_bin: &str, mdata_fn: &str) -> Result<TestMdata, FailKind> {
+ let mut cmd = Command::new(test_bin);
+ cmd.args([mdata_fn, "--exact", "--nocapture"]);
+ let out = cmd
+ .output()
+ .expect("FATAL: Could not run test binary {test_bin}");
+ assert!(out.status.success());
+ let stdout = String::from_utf8_lossy(&out.stdout);
+ let mut version = None;
+ let mut iterations = None;
+ let mut importance = Importance::default();
+ let mut weight = consts::WEIGHT_DEFAULT;
+ for line in stdout
+ .lines()
+ .filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF))
+ {
+ let mut items = line.split_whitespace();
+ // For v0, we know the ident always comes first, then one field.
+ match items.next().unwrap() {
+ consts::VERSION_LINE_NAME => {
+ let v = items.next().unwrap().parse::<u32>().unwrap();
+ if v > consts::MDATA_VER {
+ return Err(FailKind::VersionMismatch);
+ }
+ version = Some(v);
+ }
+ consts::ITER_COUNT_LINE_NAME => {
+ iterations = Some(items.next().unwrap().parse::<usize>().unwrap());
+ }
+ consts::IMPORTANCE_LINE_NAME => {
+ importance = match items.next().unwrap() {
+ "critical" => Importance::Critical,
+ "important" => Importance::Important,
+ "average" => Importance::Average,
+ "iffy" => Importance::Iffy,
+ "fluff" => Importance::Fluff,
+ _ => unreachable!(),
+ };
+ }
+ consts::WEIGHT_LINE_NAME => {
+ weight = items.next().unwrap().parse::<u8>().unwrap();
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ Ok(TestMdata {
+ version: version.unwrap(),
+ // Iterations may be determined by us and thus left unspecified.
+ iterations,
+ // In principle this should always be set, but just for the sake of
+ // stability allow the potentially-breaking change of not reporting the
+ // importance without erroring. Maybe we want to change this.
+ importance,
+ // Same with weight.
+ weight,
+ })
+}
+
+/// Compares the perf results of two profiles as per the arguments passed in.
+fn compare_profiles(args: &[String]) {
+ let ident_new = args.first().expect("FATAL: missing identifier for new run");
+ let ident_old = args.get(1).expect("FATAL: missing identifier for old run");
+ // TODO: move this to a constant also tbh
+ let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap();
+ let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
+
+ // Use the blank outputs initially, so we can merge into these with prefixes.
+ let mut outputs_new = Output::blank();
+ let mut outputs_old = Output::blank();
+
+ for e in runs_dir.read_dir().unwrap() {
+ let Ok(entry) = e else {
+ continue;
+ };
+ let Ok(metadata) = entry.metadata() else {
+ continue;
+ };
+ if metadata.is_file() {
+ let Ok(name) = entry.file_name().into_string() else {
+ continue;
+ };
+
+ // A little helper to avoid code duplication. Reads the `output` from
+ // a json file, then merges it into what we have so far.
+ let read_into = |output: &mut Output| {
+ let mut elems = name.split('.').skip(1);
+ let prefix = elems.next().unwrap();
+ assert_eq!("json", elems.next().unwrap());
+ assert!(elems.next().is_none());
+ let handle = OpenOptions::new().read(true).open(entry.path()).unwrap();
+ let o_other: Output = serde_json::from_reader(handle).unwrap();
+ output.merge(o_other, prefix);
+ };
+
+ if name.starts_with(ident_old) {
+ read_into(&mut outputs_old);
+ } else if name.starts_with(ident_new) {
+ read_into(&mut outputs_new);
+ }
+ }
+ }
+
+ let res = outputs_new.compare_perf(outputs_old);
+ println!("{res}");
+}
+
+#[expect(clippy::too_many_lines, reason = "This will be split up soon!")]
fn main() {
+ let args = std::env::args().collect::<Vec<_>>();
// We get passed the test we need to run as the 1st argument after our own name.
- let test_bin = std::env::args().nth(1).unwrap();
- let mut cmd = Command::new(&test_bin);
+ let test_bin = args
+ .get(1)
+ .expect("FATAL: No test binary or command; this shouldn't be manually invoked!");
+
+ // We're being asked to compare two results, not run the profiler.
+ if test_bin == "compare" {
+ compare_profiles(&args[2..]);
+ return;
+ }
+
+ // Whether to skip printing some information to stderr.
+ let mut quiet = false;
+ // Minimum test importance we care about this run.
+ let mut thresh = Importance::Iffy;
+ // Where to print the output of this run.
+ let mut out_kind = OutputKind::Markdown;
+
+ for arg in args.iter().skip(2) {
+ match arg.as_str() {
+ "--critical" => thresh = Importance::Critical,
+ "--important" => thresh = Importance::Important,
+ "--average" => thresh = Importance::Average,
+ "--iffy" => thresh = Importance::Iffy,
+ "--fluff" => thresh = Importance::Fluff,
+ "--quiet" => quiet = true,
+ s if s.starts_with("--json") => {
+ out_kind = OutputKind::Json(Path::new(
+ s.strip_prefix("--json=")
+ .expect("FATAL: Invalid json parameter; pass --json=filename"),
+ ));
+ }
+ _ => (),
+ }
+ }
+ if !quiet {
+ eprintln!("Starting perf check");
+ }
+
+ let mut cmd = Command::new(test_bin);
// --format=json is nightly-only :(
cmd.args(["--list", "--format=terse"]);
let out = cmd
@@ -64,6 +245,9 @@ fn main() {
out.status.success(),
"FATAL: Cannot do perf check - test binary {test_bin} returned an error"
);
+ if !quiet {
+ eprintln!("Test binary ran successfully; starting profile...");
+ }
// Parse the test harness output to look for tests we care about.
let stdout = String::from_utf8_lossy(&out.stdout);
let mut test_list: Vec<_> = stdout
@@ -79,37 +263,62 @@ fn main() {
}
})
// Exclude tests that aren't marked for perf triage based on suffix.
- .filter(|t_name| t_name.ends_with(SUF_NORMAL) || t_name.ends_with(SUF_FIXED))
+ .filter(|t_name| {
+ t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA)
+ })
.collect();
// Pulling itertools just for .dedup() would be quite a big dependency that's
- // not used elsewhere, so do this on the vec instead.
+ // not used elsewhere, so do this on a vec instead.
test_list.sort_unstable();
test_list.dedup();
- if !test_list.is_empty() {
- // Print the markdown header which matches hyperfine's result.
- // TODO: Support exporting JSON also.
- println!(
- "| Command | Mean [ms] | Min [ms] | Max [ms] | Iterations | Iter/sec |\n|:---|---:|---:|---:|---:|---:|"
- );
- }
+ let len = test_list.len();
+
+ // Tests should come in pairs with their mdata fn!
+ assert!(
+ len.is_multiple_of(2),
+ "Malformed tests in test binary {test_bin}"
+ );
+
+ let mut output = Output::default();
// Spawn and profile an instance of each perf-sensitive test, via hyperfine.
- for t_name in test_list {
- // Pretty-print the stripped name for the test.
- let t_name_normal = t_name.replace(SUF_FIXED, "").replace(SUF_NORMAL, "");
+ // Each test is a pair of (test, metadata-returning-fn), so grab both. We also
+ // know the list is sorted.
+ for (idx, t_pair) in test_list.chunks_exact(2).enumerate() {
+ if !quiet {
+ eprint!("\rProfiling test {}/{}", idx + 1, len / 2);
+ }
+ // Be resilient against changes to these constants.
+ let (t_name, t_mdata) = if consts::SUF_NORMAL < consts::SUF_MDATA {
+ (t_pair[0], t_pair[1])
+ } else {
+ (t_pair[1], t_pair[0])
+ };
+ // Pretty-printable stripped name for the test.
+ let t_name_pretty = t_name.replace(consts::SUF_NORMAL, "");
+
+ // Get the metadata this test reports for us.
+ let t_mdata = match parse_mdata(test_bin, t_mdata) {
+ Ok(mdata) => mdata,
+ Err(err) => fail!(output, t_name_pretty, err),
+ };
+
+ if t_mdata.importance < thresh {
+ fail!(output, t_name_pretty, t_mdata, FailKind::Skipped);
+ }
+
// Time test execution to see how many iterations we need to do in order
// to account for random noise. This is skipped for tests with fixed
// iteration counts.
- let final_iter_count = if t_name.ends_with(SUF_FIXED) {
- None
- } else {
+ let mut errored = false;
+ let final_iter_count = t_mdata.iterations.unwrap_or_else(|| {
let mut iter_count = DEFAULT_ITER_COUNT;
loop {
- let mut cmd = Command::new(&test_bin);
+ let mut cmd = Command::new(test_bin);
cmd.args([t_name, "--exact"]);
- cmd.env(ITER_ENV_VAR, format!("{iter_count}"));
+ cmd.env(consts::ITER_ENV_VAR, format!("{iter_count}"));
// Don't let the child muck up our stdin/out/err.
cmd.stdin(Stdio::null());
cmd.stdout(Stdio::null());
@@ -119,28 +328,32 @@ fn main() {
let out = cmd.spawn().unwrap().wait();
let post = Instant::now();
if !out.unwrap().success() {
- println!(
- "| {t_name_normal} (ERRORED IN TRIAGE) | N/A | N/A | N/A | {iter_count} | N/A |"
- );
- return;
+ errored = true;
+ break iter_count;
}
- if post - pre > NOISE_CUTOFF {
- break Some(iter_count);
+ if post - pre > consts::NOISE_CUTOFF {
+ break iter_count;
} else if let Some(c) = iter_count.checked_mul(ITER_COUNT_MUL) {
iter_count = c;
} else {
// This should almost never happen, but maybe..?
eprintln!(
- "WARNING: Running nearly usize::MAX iterations of test {t_name_normal}"
+ "WARNING: Running nearly usize::MAX iterations of test {t_name_pretty}"
);
- break Some(iter_count);
+ break iter_count;
}
}
- };
+ });
+
+ // Don't profile failing tests.
+ if errored {
+ fail!(output, t_name_pretty, t_mdata, FailKind::Triage);
+ }
// Now profile!
let mut perf_cmd = Command::new("hyperfine");
// Warm up the cache and print markdown output to stdout.
+ // TODO: json
perf_cmd.args([
"--style",
"none",
@@ -150,42 +363,89 @@ fn main() {
"-",
&format!("{test_bin} {t_name}"),
]);
- if let Some(final_iter_count) = final_iter_count {
- perf_cmd.env(ITER_ENV_VAR, format!("{final_iter_count}"));
- }
+ perf_cmd.env(consts::ITER_ENV_VAR, format!("{final_iter_count}"));
let p_out = perf_cmd.output().unwrap();
- let fin_iter = match final_iter_count {
- Some(i) => &format!("{i}"),
- None => "(preset)",
- };
if p_out.status.success() {
- let output = String::from_utf8_lossy(&p_out.stdout);
- // Strip the name of the test binary from the table (and the space after it!)
- // + our extraneous test bits + the "Relative" column (which is always at the end and "1.00").
- let output = output
- .replace(&format!("{test_bin} "), "")
- .replace(SUF_FIXED, "")
- .replace(SUF_NORMAL, "")
- .replace(" 1.00 |", "");
+ let cmd_output = String::from_utf8_lossy(&p_out.stdout);
// Can't use .last() since we have a trailing newline. Sigh.
- let fin = output.lines().nth(3).unwrap();
+ let results_line = cmd_output.lines().nth(3).unwrap();
+ // Grab the values out of the pretty-print.
+ // TODO: Parse json instead.
+ let mut res_iter = results_line.split_whitespace();
+ // Durations are given in milliseconds, so account for that.
+ let mean =
+ Duration::from_secs_f64(res_iter.nth(4).unwrap().parse::<f64>().unwrap() / 1000.);
+ let stddev =
+ Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::<f64>().unwrap() / 1000.);
- // Calculate how many iterations this does per second, for easy comparison.
- let ms = fin
- .split_whitespace()
- .nth(3)
+ output.success(
+ t_name_pretty,
+ t_mdata,
+ final_iter_count,
+ Timings { mean, stddev },
+ );
+ } else {
+ fail!(
+ output,
+ t_name_pretty,
+ t_mdata,
+ final_iter_count,
+ FailKind::Profile
+ );
+ }
+ }
+ if !quiet {
+ if output.is_empty() {
+ eprintln!("Nothing to do.");
+ } else {
+ // If stdout and stderr are on the same terminal, move us after the
+ // output from above.
+ eprintln!();
+ }
+ }
+
+ // No need making an empty json file on every empty test bin.
+ if output.is_empty() {
+ return;
+ }
+
+ match out_kind {
+ OutputKind::Markdown => print!("{output}"),
+ OutputKind::Json(user_path) => {
+ let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap();
+ let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR);
+ std::fs::create_dir_all(&runs_dir).unwrap();
+ assert!(
+ !user_path.to_string_lossy().is_empty(),
+ "FATAL: Empty filename specified!"
+ );
+ // Get the test binary's crate's name; a path like
+ // target/release-fast/deps/gpui-061ff76c9b7af5d7
+ // would be reduced to just "gpui".
+ let test_bin_stripped = Path::new(test_bin)
+ .file_name()
+ .unwrap()
+ .to_str()
.unwrap()
- .parse::<f64>()
+ .rsplit_once('-')
+ .unwrap()
+ .0;
+ let mut file_path = runs_dir.join(user_path);
+ file_path
+ .as_mut_os_string()
+ .push(format!(".{test_bin_stripped}.json"));
+ let mut out_file = OpenOptions::new()
+ .write(true)
+ .create(true)
+ .truncate(true)
+ .open(&file_path)
.unwrap();
- let mul_fac = 1000.0 / ms;
- let iter_sec = match final_iter_count {
- #[allow(clippy::cast_precision_loss)]
- Some(c) => &format!("{:.1}", mul_fac * c as f64),
- None => "(unknown)",
- };
- println!("{fin} {fin_iter} | {iter_sec} |");
- } else {
- println!("{t_name_normal} (ERRORED) | N/A | N/A | N/A | {fin_iter} | N/A |");
+ out_file
+ .write_all(&serde_json::to_vec(&output).unwrap())
+ .unwrap();
+ if !quiet {
+ eprintln!("JSON output written to {}", file_path.display());
+ }
}
}
}