diff --git a/.cargo/config.toml b/.cargo/config.toml index 74d34226af09c11b56faa6722e00afa218c924f5..9da793fc48b62f7f03cd1d36a505fa1e1ef2a45a 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -5,6 +5,7 @@ rustflags = ["-C", "symbol-mangling-version=v0", "--cfg", "tokio_unstable"] [alias] xtask = "run --package xtask --" perf-test = ["test", "--profile", "release-fast", "--lib", "--bins", "--tests", "--config", "target.'cfg(true)'.runner='target/release/perf'", "--config", "target.'cfg(true)'.rustflags=[\"--cfg\", \"perf_enabled\"]"] +perf-compare = ["run", "--release", "-p", "perf", "--", "compare"] [target.x86_64-unknown-linux-gnu] linker = "clang" @@ -24,3 +25,4 @@ rustflags = [ [env] MACOSX_DEPLOYMENT_TARGET = "10.15.7" +CARGO_WORKSPACE_DIR = { value = "", relative = true } diff --git a/.gitignore b/.gitignore index 7b40c45adf614eb91f1676144e7b70a7b2a373f2..d248b1f7e5adf30cb286a1737c1cd4f72f0f5d20 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ .venv .vscode .wrangler +.perf-runs /assets/*licenses.* /crates/collab/seed.json /crates/theme/schemas/theme.json diff --git a/Cargo.lock b/Cargo.lock index 6293b0cc2da475ef5f2282a039c123c76d31e1c7..dbe2467499ad1c5d6f67c4de82546e2b560451bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12169,6 +12169,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" name = "perf" version = "0.1.0" dependencies = [ + "collections", + "serde", + "serde_json", "workspace-hack", ] @@ -18586,6 +18589,7 @@ dependencies = [ name = "util_macros" version = "0.1.0" dependencies = [ + "perf", "quote", "syn 2.0.101", "workspace-hack", diff --git a/Cargo.toml b/Cargo.toml index fd08fbb3f971e84d27e469bd79888531366109c4..d4812908ac8292caf8371ce1d6dd9c9ee4042ca0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -356,6 +356,7 @@ outline = { path = "crates/outline" } outline_panel = { path = "crates/outline_panel" } panel = { path = "crates/panel" } paths = { path = "crates/paths" } +perf = { path = "tooling/perf" } picker = { path = "crates/picker" } plugin = { path = "crates/plugin" } plugin_macros = { path = "crates/plugin_macros" } diff --git a/crates/util_macros/Cargo.toml b/crates/util_macros/Cargo.toml index 45145a68f6a7d54d759d932c3dc851d14f4939d9..344331f1395ebb90251c4b08f92c57f28213fa4f 100644 --- a/crates/util_macros/Cargo.toml +++ b/crates/util_macros/Cargo.toml @@ -16,6 +16,7 @@ doctest = false [dependencies] quote.workspace = true syn.workspace = true +perf.workspace = true workspace-hack.workspace = true [features] diff --git a/crates/util_macros/src/util_macros.rs b/crates/util_macros/src/util_macros.rs index d3f05afdecbca8cb3b4c8685054d3828e6c702fd..69f6306133f490087b2cefeb71aeafab08b98a9a 100644 --- a/crates/util_macros/src/util_macros.rs +++ b/crates/util_macros/src/util_macros.rs @@ -1,6 +1,7 @@ #![cfg_attr(not(target_os = "windows"), allow(unused))] #![allow(clippy::test_attr_in_doctest)] +use perf::*; use proc_macro::TokenStream; use quote::{ToTokens, quote}; use syn::{ItemFn, LitStr, parse_macro_input, parse_quote}; @@ -90,68 +91,81 @@ pub fn line_endings(input: TokenStream) -> TokenStream { } /// Inner data for the perf macro. +#[derive(Default)] struct PerfArgs { - /// How many times to loop a test before rerunning the test binary. - /// If left empty, the test harness will auto-determine this value. + /// How many times to loop a test before rerunning the test binary. If left + /// empty, the test harness will auto-determine this value. iterations: Option, + /// How much this test's results should be weighed when comparing across runs. + /// If unspecified, defaults to `WEIGHT_DEFAULT` (50). + weight: Option, + /// How relevant a benchmark is to overall performance. See docs on the enum + /// for details. If unspecified, `Average` is selected. + importance: Importance, } -impl syn::parse::Parse for PerfArgs { - fn parse(input: syn::parse::ParseStream) -> syn::Result { - if input.is_empty() { - return Ok(PerfArgs { iterations: None }); - } - - let mut iterations = None; - // In principle we only have one possible argument, but leave this as - // a loop in case we expand this in the future. - for meta in - syn::punctuated::Punctuated::::parse_terminated(input)? - { - match &meta { - syn::Meta::NameValue(meta_name_value) => { - if meta_name_value.path.is_ident("iterations") { - iterations = Some(meta_name_value.value.clone()); - } else { - return Err(syn::Error::new_spanned( - &meta_name_value.path, - "unexpected argument, expected 'iterations'", - )); - } - } - _ => { - return Err(syn::Error::new_spanned( - meta, - "expected name-value argument like 'iterations = 1'", - )); - } - } +#[warn(clippy::all, clippy::pedantic)] +impl PerfArgs { + /// Parses attribute arguments into a `PerfArgs`. + fn parse_into(&mut self, meta: syn::meta::ParseNestedMeta) -> syn::Result<()> { + if meta.path.is_ident("iterations") { + self.iterations = Some(meta.value()?.parse()?); + } else if meta.path.is_ident("weight") { + self.weight = Some(meta.value()?.parse()?); + } else if meta.path.is_ident("critical") { + self.importance = Importance::Critical; + } else if meta.path.is_ident("important") { + self.importance = Importance::Important; + } else if meta.path.is_ident("average") { + // This shouldn't be specified manually, but oh well. + self.importance = Importance::Average; + } else if meta.path.is_ident("iffy") { + self.importance = Importance::Iffy; + } else if meta.path.is_ident("fluff") { + self.importance = Importance::Fluff; + } else { + return Err(syn::Error::new_spanned(meta.path, "unexpected identifier")); } - - Ok(PerfArgs { iterations }) + Ok(()) } } /// Marks a test as perf-sensitive, to be triaged when checking the performance /// of a build. This also automatically applies `#[test]`. /// +/// +/// # Usage +/// Applying this attribute to a test marks it as average importance by default. +/// There are 4 levels of importance (`Critical`, `Important`, `Average`, `Fluff`); +/// see the documentation on `Importance` for details. Add the importance as a +/// parameter to override the default (e.g. `#[perf(important)]`). +/// +/// Each test also has a weight factor. This is irrelevant on its own, but is considered +/// when comparing results across different runs. By default, this is set to 50; +/// pass `weight = n` as a parameter to override this. Note that this value is only +/// relevant within its importance category. +/// /// By default, the number of iterations when profiling this test is auto-determined. -/// If this needs to be overwritten, pass the desired iteration count to the macro -/// as a parameter (`#[perf(iterations = n)]`). Note that the actual profiler may still -/// run the test an arbitrary number times; this flag just sets the number of executions -/// before the process is restarted and global state is reset. +/// If this needs to be overwritten, pass the desired iteration count as a parameter +/// (`#[perf(iterations = n)]`). Note that the actual profiler may still run the test +/// an arbitrary number times; this flag just sets the number of executions before the +/// process is restarted and global state is reset. /// -/// # Usage notes -/// This should probably not be applied to tests that do any significant fs IO, as -/// locks on files may not be released in time when repeating a test many times. This -/// might lead to spurious failures. +/// This attribute should probably not be applied to tests that do any significant +/// disk IO, as locks on files may not be released in time when repeating a test many +/// times. This might lead to spurious failures. /// /// # Examples /// ```rust /// use util_macros::perf; /// /// #[perf] -/// fn expensive_computation_test() { +/// fn generic_test() { +/// // Test goes here. +/// } +/// +/// #[perf(fluff, weight = 30)] +/// fn cold_path_test() { /// // Test goes here. /// } /// ``` @@ -161,72 +175,108 @@ impl syn::parse::Parse for PerfArgs { /// ```rust,ignore /// use util_macros::perf; /// -/// #[perf(iterations = 1)] +/// #[perf(iterations = 1, critical)] /// #[gpui::test] /// fn oneshot_test(_cx: &mut gpui::TestAppContext) { /// // Test goes here. /// } /// ``` #[proc_macro_attribute] +#[warn(clippy::all, clippy::pedantic)] pub fn perf(our_attr: TokenStream, input: TokenStream) -> TokenStream { - // If any of the below constants are changed, make sure to also update the perf - // profiler to match! - - /// The suffix on tests marked with `#[perf]`. - const SUF_NORMAL: &str = "__ZED_PERF"; - /// The suffix on tests marked with `#[perf(iterations = n)]`. - const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER"; - /// The env var in which we pass the iteration count to our tests. - const ITER_ENV_VAR: &str = "ZED_PERF_ITER"; - - let iter_count = parse_macro_input!(our_attr as PerfArgs).iterations; + let mut args = PerfArgs::default(); + let parser = syn::meta::parser(|meta| PerfArgs::parse_into(&mut args, meta)); + parse_macro_input!(our_attr with parser); let ItemFn { - mut attrs, + attrs: mut attrs_main, vis, - mut sig, + sig: mut sig_main, block, } = parse_macro_input!(input as ItemFn); - attrs.push(parse_quote!(#[test])); - attrs.push(parse_quote!(#[allow(non_snake_case)])); + attrs_main.push(parse_quote!(#[test])); + attrs_main.push(parse_quote!(#[allow(non_snake_case)])); + + let fns = if cfg!(perf_enabled) { + #[allow(clippy::wildcard_imports, reason = "We control the other side")] + use consts::*; - let block: Box = if cfg!(perf_enabled) { // Make the ident obvious when calling, for the test parser. - let mut new_ident = sig.ident.to_string(); - if iter_count.is_some() { - new_ident.push_str(SUF_FIXED); - } else { - new_ident.push_str(SUF_NORMAL); - } + // Also set up values for the second metadata-returning "test". + let mut new_ident_main = sig_main.ident.to_string(); + let mut new_ident_meta = new_ident_main.clone(); + new_ident_main.push_str(SUF_NORMAL); + new_ident_meta.push_str(SUF_MDATA); - let new_ident = syn::Ident::new(&new_ident, sig.ident.span()); - sig.ident = new_ident; - // If we have a preset iteration count, just use that. - if let Some(iter_count) = iter_count { - parse_quote!({ - for _ in 0..#iter_count { - #block - } - }) - } else { - // Otherwise, the perf harness will pass us the value in an env var. + let new_ident_main = syn::Ident::new(&new_ident_main, sig_main.ident.span()); + sig_main.ident = new_ident_main; + + // We don't want any nonsense if the original test had a weird signature. + let new_ident_meta = syn::Ident::new(&new_ident_meta, sig_main.ident.span()); + let sig_meta = parse_quote!(fn #new_ident_meta()); + let attrs_meta = parse_quote!(#[test] #[allow(non_snake_case)]); + + // Make the test loop as the harness instructs it to. + let block_main = { + // The perf harness will pass us the value in an env var. Even if we + // have a preset value, just do this to keep the code paths unified. parse_quote!({ let iter_count = std::env::var(#ITER_ENV_VAR).unwrap().parse::().unwrap(); for _ in 0..iter_count { #block } }) - } + }; + let importance = format!("{}", args.importance); + let block_meta = { + // This function's job is to just print some relevant info to stdout, + // based on the params this attr is passed. It's not an actual test. + // Since we use a custom attr set on our metadata fn, it shouldn't + // cause problems with xfail tests. + let q_iter = if let Some(iter) = args.iterations { + quote! { + println!("{} {} {}", #MDATA_LINE_PREF, #ITER_COUNT_LINE_NAME, #iter); + } + } else { + quote! {} + }; + let weight = args + .weight + .unwrap_or_else(|| parse_quote! { #WEIGHT_DEFAULT }); + parse_quote!({ + #q_iter + println!("{} {} {}", #MDATA_LINE_PREF, #WEIGHT_LINE_NAME, #weight); + println!("{} {} {}", #MDATA_LINE_PREF, #IMPORTANCE_LINE_NAME, #importance); + println!("{} {} {}", #MDATA_LINE_PREF, #VERSION_LINE_NAME, #MDATA_VER); + }) + }; + + vec![ + // The real test. + ItemFn { + attrs: attrs_main, + vis: vis.clone(), + sig: sig_main, + block: block_main, + }, + // The fake test. + ItemFn { + attrs: attrs_meta, + vis, + sig: sig_meta, + block: block_meta, + }, + ] } else { - block + vec![ItemFn { + attrs: attrs_main, + vis, + sig: sig_main, + block, + }] }; - ItemFn { - attrs, - vis, - sig, - block, - } - .into_token_stream() - .into() + fns.into_iter() + .flat_map(|f| TokenStream::from(f.into_token_stream())) + .collect() } diff --git a/tooling/perf/Cargo.toml b/tooling/perf/Cargo.toml index f5013a82836b9888d94fb39fa18f0efa00e1b0ce..4766b58d8a760aa995dba7092d33c436559019c2 100644 --- a/tooling/perf/Cargo.toml +++ b/tooling/perf/Cargo.toml @@ -1,11 +1,30 @@ [package] name = "perf" version = "0.1.0" +description = "A tool for measuring Zed test performance, with too many Clippy lints" publish.workspace = true edition.workspace = true -[lints] -workspace = true +[lib] + +# Some personal lint preferences :3 +[lints.rust] +missing_docs = "warn" + +[lints.clippy] +needless_continue = "allow" # For a convenience macro +all = "warn" +pedantic = "warn" +style = "warn" +missing_docs_in_private_items = "warn" +as_underscore = "deny" +allow_attributes_without_reason = "deny" +let_underscore_must_use = "forbid" +undocumented_unsafe_blocks = "forbid" +missing_safety_doc = "forbid" [dependencies] +collections.workspace = true +serde.workspace = true +serde_json.workspace = true workspace-hack.workspace = true diff --git a/tooling/perf/src/lib.rs b/tooling/perf/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..30909f646b061895e10f5c860149e2370892ccd2 --- /dev/null +++ b/tooling/perf/src/lib.rs @@ -0,0 +1,413 @@ +//! Some constants and datatypes used in the Zed perf profiler. Should only be +//! consumed by the crate providing the matching macros. + +use collections::HashMap; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +pub mod consts { + //! Preset idenitifiers and constants so that the profiler and proc macro agree + //! on their communication protocol. + + /// The suffix on the actual test function. + pub const SUF_NORMAL: &str = "__ZED_PERF_FN"; + /// The suffix on an extra function which prints metadata about a test to stdout. + pub const SUF_MDATA: &str = "__ZED_PERF_MDATA"; + /// The env var in which we pass the iteration count to our tests. + pub const ITER_ENV_VAR: &str = "ZED_PERF_ITER"; + /// The prefix printed on all benchmark test metadata lines, to distinguish it from + /// possible output by the test harness itself. + pub const MDATA_LINE_PREF: &str = "ZED_MDATA_"; + /// The version number for the data returned from the test metadata function. + /// Increment on non-backwards-compatible changes. + pub const MDATA_VER: u32 = 0; + /// The default weight, if none is specified. + pub const WEIGHT_DEFAULT: u8 = 50; + /// How long a test must have run to be assumed to be reliable-ish. + pub const NOISE_CUTOFF: std::time::Duration = std::time::Duration::from_millis(250); + + /// Identifier for the iteration count of a test metadata. + pub const ITER_COUNT_LINE_NAME: &str = "iter_count"; + /// Identifier for the weight of a test metadata. + pub const WEIGHT_LINE_NAME: &str = "weight"; + /// Identifier for importance in test metadata. + pub const IMPORTANCE_LINE_NAME: &str = "importance"; + /// Identifier for the test metadata version. + pub const VERSION_LINE_NAME: &str = "version"; + + /// Where to save json run information. + pub const RUNS_DIR: &str = ".perf-runs"; +} + +/// How relevant a benchmark is. +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] +pub enum Importance { + /// Regressions shouldn't be accepted without good reason. + Critical = 4, + /// Regressions should be paid extra attention. + Important = 3, + /// No extra attention should be paid to regressions, but they might still + /// be indicative of something happening. + #[default] + Average = 2, + /// Unclear if regressions are likely to be meaningful, but still worth keeping + /// an eye on. Lowest level that's checked by default by the profiler. + Iffy = 1, + /// Regressions are likely to be spurious or don't affect core functionality. + /// Only relevant if a lot of them happen, or as supplemental evidence for a + /// higher-importance benchmark regressing. Not checked by default. + Fluff = 0, +} + +impl std::fmt::Display for Importance { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Importance::Critical => f.write_str("critical"), + Importance::Important => f.write_str("important"), + Importance::Average => f.write_str("average"), + Importance::Iffy => f.write_str("iffy"), + Importance::Fluff => f.write_str("fluff"), + } + } +} + +/// Why or when did this test fail? +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum FailKind { + /// Failed while triaging it to determine the iteration count. + Triage, + /// Failed while profiling it. + Profile, + /// Failed due to an incompatible version for the test. + VersionMismatch, + /// Skipped due to filters applied on the perf run. + Skipped, +} + +impl std::fmt::Display for FailKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FailKind::Triage => f.write_str("failed in triage"), + FailKind::Profile => f.write_str("failed while profiling"), + FailKind::VersionMismatch => f.write_str("test version mismatch"), + FailKind::Skipped => f.write_str("skipped"), + } + } +} + +/// Information about a given perf test. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TestMdata { + /// A version number for when the test was generated. If this is greater + /// than the version this test handler expects, one of the following will + /// happen in an unspecified manner: + /// - The test is skipped silently. + /// - The handler exits with an error message indicating the version mismatch + /// or inability to parse the metadata. + /// + /// INVARIANT: If `version` <= `MDATA_VER`, this tool *must* be able to + /// correctly parse the output of this test. + pub version: u32, + /// How many iterations to pass this test, if this is preset. + pub iterations: Option, + /// The importance of this particular test. See the docs on `Importance` for + /// details. + pub importance: Importance, + /// The weight of this particular test within its importance category. Used + /// when comparing across runs. + pub weight: u8, +} + +/// The actual timings of a test, as measured by Hyperfine. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Timings { + /// Mean runtime for `self.iter_total` runs of this test. + pub mean: Duration, + /// Standard deviation for the above. + pub stddev: Duration, +} + +impl Timings { + /// How many iterations does this test seem to do per second? + #[expect( + clippy::cast_precision_loss, + reason = "We only care about a couple sig figs anyways" + )] + #[must_use] + pub fn iters_per_sec(&self, total_iters: usize) -> f64 { + (1000. / self.mean.as_millis() as f64) * total_iters as f64 + } +} + +/// Aggregate output of all tests run by this handler. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct Output { + /// A list of test outputs. Format is `(test_name, iter_count, timings)`. + /// The latter being set indicates the test succeeded. + /// + /// INVARIANT: If the test succeeded, the second field is `Some(mdata)` and + /// `mdata.iterations` is `Some(_)`. + tests: Vec<(String, Option, Result)>, +} + +impl Output { + /// Instantiates an empty "output". Useful for merging. + #[must_use] + pub fn blank() -> Self { + Output { tests: Vec::new() } + } + + /// Reports a success and adds it to this run's `Output`. + pub fn success( + &mut self, + name: impl AsRef, + mut mdata: TestMdata, + iters: usize, + timings: Timings, + ) { + mdata.iterations = Some(iters); + self.tests + .push((name.as_ref().to_string(), Some(mdata), Ok(timings))); + } + + /// Reports a failure and adds it to this run's `Output`. If this test was tried + /// with some number of iterations (i.e. this was not a version mismatch or skipped + /// test), it should be reported also. + /// + /// Using the `fail!()` macro is usually more convenient. + pub fn failure( + &mut self, + name: impl AsRef, + mut mdata: Option, + attempted_iters: Option, + kind: FailKind, + ) { + if let Some(ref mut mdata) = mdata { + mdata.iterations = attempted_iters; + } + self.tests + .push((name.as_ref().to_string(), mdata, Err(kind))); + } + + /// True if no tests executed this run. + #[must_use] + pub fn is_empty(&self) -> bool { + self.tests.is_empty() + } + + /// Sorts the runs in the output in the order that we want it printed. + pub fn sort(&mut self) { + self.tests.sort_unstable_by(|a, b| match (a, b) { + // Tests where we got no metadata go at the end. + ((_, Some(_), _), (_, None, _)) => std::cmp::Ordering::Greater, + ((_, None, _), (_, Some(_), _)) => std::cmp::Ordering::Less, + // Then sort by importance, then weight. + ((_, Some(a_mdata), _), (_, Some(b_mdata), _)) => { + let c = a_mdata.importance.cmp(&b_mdata.importance); + if matches!(c, std::cmp::Ordering::Equal) { + a_mdata.weight.cmp(&b_mdata.weight) + } else { + c + } + } + // Lastly by name. + ((a_name, ..), (b_name, ..)) => a_name.cmp(b_name), + }); + } + + /// Merges the output of two runs, appending a prefix to the results of the new run. + /// To be used in conjunction with `Output::blank()`, or else only some tests will have + /// a prefix set. + pub fn merge(&mut self, other: Self, pref_other: impl AsRef) { + self.tests = std::mem::take(&mut self.tests) + .into_iter() + .chain(other.tests.into_iter().map(|(name, md, tm)| { + let mut new_name = "crates/".to_string(); + new_name.push_str(pref_other.as_ref()); + new_name.push_str("::"); + new_name.push_str(&name); + (new_name, md, tm) + })) + .collect(); + } + + /// Evaluates the performance of `self` against `baseline`. The latter is taken + /// as the comparison point, i.e. a positive resulting `PerfReport` means that + /// `self` performed better. + /// + /// # Panics + /// `self` and `baseline` are assumed to have the iterations field on all + /// `TestMdata`s set to `Some(_)` if the `TestMdata` is present itself. + #[must_use] + pub fn compare_perf(self, baseline: Self) -> PerfReport { + let self_categories = self.collapse(); + let mut other_categories = baseline.collapse(); + + let deltas = self_categories + .into_iter() + .filter_map(|(cat, self_data)| { + // Only compare categories where both meow + // runs have data. / + let mut other_data = other_categories.remove(&cat)?; + let mut max = 0.; + let mut min = 0.; + + // Running totals for averaging out tests. + let mut r_total_numerator = 0.; + let mut r_total_denominator = 0; + // Yeah this is O(n^2), but realistically it'll hardly be a bottleneck. + for (name, (s_timings, s_iters, weight)) in self_data { + // Only use the new weights if they conflict. + let Some((o_timings, o_iters, _)) = other_data.remove(&name) else { + continue; + }; + let shift = + (s_timings.iters_per_sec(s_iters) / o_timings.iters_per_sec(o_iters)) - 1.; + if shift > max { + max = shift; + } + if shift < min { + min = shift; + } + r_total_numerator += shift * f64::from(weight); + r_total_denominator += u32::from(weight); + } + let mean = r_total_numerator / f64::from(r_total_denominator); + // TODO: also aggregate standard deviation? that's harder to keep + // meaningful, though, since we dk which tests are correlated + Some((cat, PerfDelta { max, mean, min })) + }) + .collect(); + + PerfReport { deltas } + } + + /// Collapses the `PerfReport` into a `HashMap` of `Importance` <-> tests + /// each represented as a map of `name, (Timings, iterations, weight)`. + fn collapse(self) -> HashMap> { + let mut categories = HashMap::>::default(); + for entry in self.tests { + if let Some(mdata) = entry.1 + && let Ok(timings) = entry.2 + { + if let Some(handle) = categories.get_mut(&mdata.importance) { + handle.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight)); + } else { + let mut new = HashMap::default(); + new.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight)); + categories.insert(mdata.importance, new); + } + } + } + + categories + } +} + +impl std::fmt::Display for Output { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Don't print the header for an empty run. + if self.tests.is_empty() { + return Ok(()); + } + + // We want to print important tests at the top, then alphabetical. + let mut sorted = self.clone(); + sorted.sort(); + // Markdown header for making a nice little table :> + writeln!( + f, + "| Command | Iter/sec | Mean [ms] | SD [ms] | Iterations | Importance (weight) |", + )?; + writeln!(f, "|:---|---:|---:|---:|---:|---:|")?; + for (name, metadata, timings) in &sorted.tests { + match metadata { + Some(metadata) => match timings { + // Happy path. + Ok(timings) => { + // If the test succeeded, then metadata.iterations is Some(_). + writeln!( + f, + "| {} | {:.2} | {} | {:.2} | {} | {} ({}) |", + name, + timings.iters_per_sec(metadata.iterations.unwrap()), + { + // Very small mean runtimes will give inaccurate + // results. Should probably also penalise weight. + let mean = timings.mean.as_secs_f64() * 1000.; + if mean < consts::NOISE_CUTOFF.as_secs_f64() * 1000. / 8. { + format!("{mean:.2} (unreliable)") + } else { + format!("{mean:.2}") + } + }, + timings.stddev.as_secs_f64() * 1000., + metadata.iterations.unwrap(), + metadata.importance, + metadata.weight, + )?; + } + // We have (some) metadata, but the test errored. + Err(err) => writeln!( + f, + "| ({}) {} | N/A | N/A | N/A | {} | {} ({}) |", + err, + name, + metadata + .iterations + .map_or_else(|| "N/A".to_owned(), |i| format!("{i}")), + metadata.importance, + metadata.weight + )?, + }, + // No metadata, couldn't even parse the test output. + None => writeln!( + f, + "| ({}) {} | N/A | N/A | N/A | N/A | N/A |", + timings.as_ref().unwrap_err(), + name + )?, + } + } + writeln!(f)?; + Ok(()) + } +} + +/// The difference in performance between two runs within a given importance +/// category. +struct PerfDelta { + /// The biggest improvement / least bad regression. + max: f64, + /// The weighted average change in test times. + mean: f64, + /// The worst regression / smallest improvement. + min: f64, +} + +/// Shim type for reporting all performance deltas across importance categories. +pub struct PerfReport { + /// Inner (group, diff) pairing. + deltas: HashMap, +} + +impl std::fmt::Display for PerfReport { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.deltas.is_empty() { + return write!(f, "(no matching tests)"); + } + let sorted = self.deltas.iter().collect::>(); + writeln!(f, "| Category | Max | Mean | Min |")?; + // We don't want to print too many newlines at the end, so handle newlines + // a little jankily like this. + write!(f, "|:---|---:|---:|---:|")?; + for (cat, delta) in sorted.into_iter().rev() { + write!( + f, + "\n| {cat} | {:.3} | {:.3} | {:.3} |", + delta.max, delta.mean, delta.min + )?; + } + Ok(()) + } +} diff --git a/tooling/perf/src/main.rs b/tooling/perf/src/main.rs index a119811aba76afccc16dbef48e4dbee576b46fdc..2610adc66f88dfa675df975219f5b2937011e81b 100644 --- a/tooling/perf/src/main.rs +++ b/tooling/perf/src/main.rs @@ -1,18 +1,15 @@ -#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)] -#![cfg_attr(release, deny(warnings))] - //! Perf profiler for Zed tests. Outputs timings of tests marked with the `#[perf]` //! attribute to stdout in Markdown. See the documentation of `util_macros::perf` //! for usage details on the actual attribute. //! //! # Setup //! Make sure `hyperfine` is installed and in the shell path, then run -//! `cargo build --bin perf --workspace --release` to build the profiler. +//! `cargo build -p perf --release` to build the profiler. //! //! # Usage -//! Calling this tool rebuilds everything with some cfg flags set for the perf -//! proc macro *and* enables optimisations (`release-fast` profile), so expect it -//! to take a little while. +//! Calling this tool rebuilds the targeted crate(s) with some cfg flags set for the +//! perf proc macro *and* enables optimisations (`release-fast` profile), so expect +//! it to take a little while. //! //! To test an individual crate, run: //! ```sh @@ -24,37 +21,221 @@ //! cargo perf-test --workspace //! ``` //! +//! Some command-line parameters are also recognised by this profiler. To filter +//! out all tests below a certain importance (e.g. `important`), run: +//! ```sh +//! cargo perf-test $WHATEVER -- --important +//! ``` +//! +//! Similarly, to skip outputting progress to the command line, pass `-- --quiet`. +//! These flags can be combined. +//! +//! ## Comparing runs +//! Passing `--json=ident` will save per-crate run files in `.perf-runs`, e.g. +//! `cargo perf-test -p gpui -- --json=blah` will result in `.perf-runs/blah.gpui.json` +//! being created (unless no tests were run). These results can be automatically +//! compared. To do so, run `cargo perf-compare new-ident old-ident`. +//! +//! NB: All files matching `.perf-runs/ident.*.json` will be considered when +//! doing this comparison, so ensure there aren't leftover files in your `.perf-runs` +//! directory that might match that! +//! //! # Notes //! This should probably not be called manually unless you're working on the profiler //! itself; use the `cargo perf-test` alias (after building this crate) instead. +#[allow(clippy::wildcard_imports, reason = "Our crate")] +use perf::*; + use std::{ + fs::OpenOptions, + io::Write, + path::{Path, PathBuf}, process::{Command, Stdio}, time::{Duration, Instant}, }; /// How many iterations to attempt the first time a test is run. -const DEFAULT_ITER_COUNT: usize = 12; +const DEFAULT_ITER_COUNT: usize = 3; /// Multiplier for the iteration count when a test doesn't pass the noise cutoff. const ITER_COUNT_MUL: usize = 4; -/// How long a test must have run to be assumed to be reliable-ish. -const NOISE_CUTOFF: Duration = Duration::from_millis(250); -// If any of the below constants are changed, make sure to also update the perf -// proc macro to match! +/// Report a failure into the output and skip an iteration. +macro_rules! fail { + ($output:ident, $name:expr, $kind:expr) => {{ + $output.failure($name, None, None, $kind); + continue; + }}; + ($output:ident, $name:expr, $mdata:expr, $kind:expr) => {{ + $output.failure($name, Some($mdata), None, $kind); + continue; + }}; + ($output:ident, $name:expr, $mdata:expr, $count:expr, $kind:expr) => {{ + $output.failure($name, Some($mdata), Some($count), $kind); + continue; + }}; +} -/// The suffix on tests marked with `#[perf]`. -const SUF_NORMAL: &str = "__ZED_PERF"; -/// The suffix on tests marked with `#[perf(iterations = n)]`. -const SUF_FIXED: &str = "__ZED_PERF_FIXEDITER"; -/// The env var in which we pass the iteration count to our tests. -const ITER_ENV_VAR: &str = "ZED_PERF_ITER"; +/// How does this perf run return its output? +enum OutputKind<'a> { + /// Print markdown to the terminal. + Markdown, + /// Save JSON to a file. + Json(&'a Path), +} -#[allow(clippy::too_many_lines)] +/// Runs a given metadata-returning function from a test handler, parsing its +/// output into a `TestMdata`. +fn parse_mdata(test_bin: &str, mdata_fn: &str) -> Result { + let mut cmd = Command::new(test_bin); + cmd.args([mdata_fn, "--exact", "--nocapture"]); + let out = cmd + .output() + .expect("FATAL: Could not run test binary {test_bin}"); + assert!(out.status.success()); + let stdout = String::from_utf8_lossy(&out.stdout); + let mut version = None; + let mut iterations = None; + let mut importance = Importance::default(); + let mut weight = consts::WEIGHT_DEFAULT; + for line in stdout + .lines() + .filter_map(|l| l.strip_prefix(consts::MDATA_LINE_PREF)) + { + let mut items = line.split_whitespace(); + // For v0, we know the ident always comes first, then one field. + match items.next().unwrap() { + consts::VERSION_LINE_NAME => { + let v = items.next().unwrap().parse::().unwrap(); + if v > consts::MDATA_VER { + return Err(FailKind::VersionMismatch); + } + version = Some(v); + } + consts::ITER_COUNT_LINE_NAME => { + iterations = Some(items.next().unwrap().parse::().unwrap()); + } + consts::IMPORTANCE_LINE_NAME => { + importance = match items.next().unwrap() { + "critical" => Importance::Critical, + "important" => Importance::Important, + "average" => Importance::Average, + "iffy" => Importance::Iffy, + "fluff" => Importance::Fluff, + _ => unreachable!(), + }; + } + consts::WEIGHT_LINE_NAME => { + weight = items.next().unwrap().parse::().unwrap(); + } + _ => unreachable!(), + } + } + + Ok(TestMdata { + version: version.unwrap(), + // Iterations may be determined by us and thus left unspecified. + iterations, + // In principle this should always be set, but just for the sake of + // stability allow the potentially-breaking change of not reporting the + // importance without erroring. Maybe we want to change this. + importance, + // Same with weight. + weight, + }) +} + +/// Compares the perf results of two profiles as per the arguments passed in. +fn compare_profiles(args: &[String]) { + let ident_new = args.first().expect("FATAL: missing identifier for new run"); + let ident_old = args.get(1).expect("FATAL: missing identifier for old run"); + // TODO: move this to a constant also tbh + let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap(); + let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR); + + // Use the blank outputs initially, so we can merge into these with prefixes. + let mut outputs_new = Output::blank(); + let mut outputs_old = Output::blank(); + + for e in runs_dir.read_dir().unwrap() { + let Ok(entry) = e else { + continue; + }; + let Ok(metadata) = entry.metadata() else { + continue; + }; + if metadata.is_file() { + let Ok(name) = entry.file_name().into_string() else { + continue; + }; + + // A little helper to avoid code duplication. Reads the `output` from + // a json file, then merges it into what we have so far. + let read_into = |output: &mut Output| { + let mut elems = name.split('.').skip(1); + let prefix = elems.next().unwrap(); + assert_eq!("json", elems.next().unwrap()); + assert!(elems.next().is_none()); + let handle = OpenOptions::new().read(true).open(entry.path()).unwrap(); + let o_other: Output = serde_json::from_reader(handle).unwrap(); + output.merge(o_other, prefix); + }; + + if name.starts_with(ident_old) { + read_into(&mut outputs_old); + } else if name.starts_with(ident_new) { + read_into(&mut outputs_new); + } + } + } + + let res = outputs_new.compare_perf(outputs_old); + println!("{res}"); +} + +#[expect(clippy::too_many_lines, reason = "This will be split up soon!")] fn main() { + let args = std::env::args().collect::>(); // We get passed the test we need to run as the 1st argument after our own name. - let test_bin = std::env::args().nth(1).unwrap(); - let mut cmd = Command::new(&test_bin); + let test_bin = args + .get(1) + .expect("FATAL: No test binary or command; this shouldn't be manually invoked!"); + + // We're being asked to compare two results, not run the profiler. + if test_bin == "compare" { + compare_profiles(&args[2..]); + return; + } + + // Whether to skip printing some information to stderr. + let mut quiet = false; + // Minimum test importance we care about this run. + let mut thresh = Importance::Iffy; + // Where to print the output of this run. + let mut out_kind = OutputKind::Markdown; + + for arg in args.iter().skip(2) { + match arg.as_str() { + "--critical" => thresh = Importance::Critical, + "--important" => thresh = Importance::Important, + "--average" => thresh = Importance::Average, + "--iffy" => thresh = Importance::Iffy, + "--fluff" => thresh = Importance::Fluff, + "--quiet" => quiet = true, + s if s.starts_with("--json") => { + out_kind = OutputKind::Json(Path::new( + s.strip_prefix("--json=") + .expect("FATAL: Invalid json parameter; pass --json=filename"), + )); + } + _ => (), + } + } + if !quiet { + eprintln!("Starting perf check"); + } + + let mut cmd = Command::new(test_bin); // --format=json is nightly-only :( cmd.args(["--list", "--format=terse"]); let out = cmd @@ -64,6 +245,9 @@ fn main() { out.status.success(), "FATAL: Cannot do perf check - test binary {test_bin} returned an error" ); + if !quiet { + eprintln!("Test binary ran successfully; starting profile..."); + } // Parse the test harness output to look for tests we care about. let stdout = String::from_utf8_lossy(&out.stdout); let mut test_list: Vec<_> = stdout @@ -79,37 +263,62 @@ fn main() { } }) // Exclude tests that aren't marked for perf triage based on suffix. - .filter(|t_name| t_name.ends_with(SUF_NORMAL) || t_name.ends_with(SUF_FIXED)) + .filter(|t_name| { + t_name.ends_with(consts::SUF_NORMAL) || t_name.ends_with(consts::SUF_MDATA) + }) .collect(); // Pulling itertools just for .dedup() would be quite a big dependency that's - // not used elsewhere, so do this on the vec instead. + // not used elsewhere, so do this on a vec instead. test_list.sort_unstable(); test_list.dedup(); - if !test_list.is_empty() { - // Print the markdown header which matches hyperfine's result. - // TODO: Support exporting JSON also. - println!( - "| Command | Mean [ms] | Min [ms] | Max [ms] | Iterations | Iter/sec |\n|:---|---:|---:|---:|---:|---:|" - ); - } + let len = test_list.len(); + + // Tests should come in pairs with their mdata fn! + assert!( + len.is_multiple_of(2), + "Malformed tests in test binary {test_bin}" + ); + + let mut output = Output::default(); // Spawn and profile an instance of each perf-sensitive test, via hyperfine. - for t_name in test_list { - // Pretty-print the stripped name for the test. - let t_name_normal = t_name.replace(SUF_FIXED, "").replace(SUF_NORMAL, ""); + // Each test is a pair of (test, metadata-returning-fn), so grab both. We also + // know the list is sorted. + for (idx, t_pair) in test_list.chunks_exact(2).enumerate() { + if !quiet { + eprint!("\rProfiling test {}/{}", idx + 1, len / 2); + } + // Be resilient against changes to these constants. + let (t_name, t_mdata) = if consts::SUF_NORMAL < consts::SUF_MDATA { + (t_pair[0], t_pair[1]) + } else { + (t_pair[1], t_pair[0]) + }; + // Pretty-printable stripped name for the test. + let t_name_pretty = t_name.replace(consts::SUF_NORMAL, ""); + + // Get the metadata this test reports for us. + let t_mdata = match parse_mdata(test_bin, t_mdata) { + Ok(mdata) => mdata, + Err(err) => fail!(output, t_name_pretty, err), + }; + + if t_mdata.importance < thresh { + fail!(output, t_name_pretty, t_mdata, FailKind::Skipped); + } + // Time test execution to see how many iterations we need to do in order // to account for random noise. This is skipped for tests with fixed // iteration counts. - let final_iter_count = if t_name.ends_with(SUF_FIXED) { - None - } else { + let mut errored = false; + let final_iter_count = t_mdata.iterations.unwrap_or_else(|| { let mut iter_count = DEFAULT_ITER_COUNT; loop { - let mut cmd = Command::new(&test_bin); + let mut cmd = Command::new(test_bin); cmd.args([t_name, "--exact"]); - cmd.env(ITER_ENV_VAR, format!("{iter_count}")); + cmd.env(consts::ITER_ENV_VAR, format!("{iter_count}")); // Don't let the child muck up our stdin/out/err. cmd.stdin(Stdio::null()); cmd.stdout(Stdio::null()); @@ -119,28 +328,32 @@ fn main() { let out = cmd.spawn().unwrap().wait(); let post = Instant::now(); if !out.unwrap().success() { - println!( - "| {t_name_normal} (ERRORED IN TRIAGE) | N/A | N/A | N/A | {iter_count} | N/A |" - ); - return; + errored = true; + break iter_count; } - if post - pre > NOISE_CUTOFF { - break Some(iter_count); + if post - pre > consts::NOISE_CUTOFF { + break iter_count; } else if let Some(c) = iter_count.checked_mul(ITER_COUNT_MUL) { iter_count = c; } else { // This should almost never happen, but maybe..? eprintln!( - "WARNING: Running nearly usize::MAX iterations of test {t_name_normal}" + "WARNING: Running nearly usize::MAX iterations of test {t_name_pretty}" ); - break Some(iter_count); + break iter_count; } } - }; + }); + + // Don't profile failing tests. + if errored { + fail!(output, t_name_pretty, t_mdata, FailKind::Triage); + } // Now profile! let mut perf_cmd = Command::new("hyperfine"); // Warm up the cache and print markdown output to stdout. + // TODO: json perf_cmd.args([ "--style", "none", @@ -150,42 +363,89 @@ fn main() { "-", &format!("{test_bin} {t_name}"), ]); - if let Some(final_iter_count) = final_iter_count { - perf_cmd.env(ITER_ENV_VAR, format!("{final_iter_count}")); - } + perf_cmd.env(consts::ITER_ENV_VAR, format!("{final_iter_count}")); let p_out = perf_cmd.output().unwrap(); - let fin_iter = match final_iter_count { - Some(i) => &format!("{i}"), - None => "(preset)", - }; if p_out.status.success() { - let output = String::from_utf8_lossy(&p_out.stdout); - // Strip the name of the test binary from the table (and the space after it!) - // + our extraneous test bits + the "Relative" column (which is always at the end and "1.00"). - let output = output - .replace(&format!("{test_bin} "), "") - .replace(SUF_FIXED, "") - .replace(SUF_NORMAL, "") - .replace(" 1.00 |", ""); + let cmd_output = String::from_utf8_lossy(&p_out.stdout); // Can't use .last() since we have a trailing newline. Sigh. - let fin = output.lines().nth(3).unwrap(); + let results_line = cmd_output.lines().nth(3).unwrap(); + // Grab the values out of the pretty-print. + // TODO: Parse json instead. + let mut res_iter = results_line.split_whitespace(); + // Durations are given in milliseconds, so account for that. + let mean = + Duration::from_secs_f64(res_iter.nth(4).unwrap().parse::().unwrap() / 1000.); + let stddev = + Duration::from_secs_f64(res_iter.nth(1).unwrap().parse::().unwrap() / 1000.); - // Calculate how many iterations this does per second, for easy comparison. - let ms = fin - .split_whitespace() - .nth(3) + output.success( + t_name_pretty, + t_mdata, + final_iter_count, + Timings { mean, stddev }, + ); + } else { + fail!( + output, + t_name_pretty, + t_mdata, + final_iter_count, + FailKind::Profile + ); + } + } + if !quiet { + if output.is_empty() { + eprintln!("Nothing to do."); + } else { + // If stdout and stderr are on the same terminal, move us after the + // output from above. + eprintln!(); + } + } + + // No need making an empty json file on every empty test bin. + if output.is_empty() { + return; + } + + match out_kind { + OutputKind::Markdown => print!("{output}"), + OutputKind::Json(user_path) => { + let wspace_dir = std::env::var("CARGO_WORKSPACE_DIR").unwrap(); + let runs_dir = PathBuf::from(&wspace_dir).join(consts::RUNS_DIR); + std::fs::create_dir_all(&runs_dir).unwrap(); + assert!( + !user_path.to_string_lossy().is_empty(), + "FATAL: Empty filename specified!" + ); + // Get the test binary's crate's name; a path like + // target/release-fast/deps/gpui-061ff76c9b7af5d7 + // would be reduced to just "gpui". + let test_bin_stripped = Path::new(test_bin) + .file_name() + .unwrap() + .to_str() .unwrap() - .parse::() + .rsplit_once('-') + .unwrap() + .0; + let mut file_path = runs_dir.join(user_path); + file_path + .as_mut_os_string() + .push(format!(".{test_bin_stripped}.json")); + let mut out_file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&file_path) .unwrap(); - let mul_fac = 1000.0 / ms; - let iter_sec = match final_iter_count { - #[allow(clippy::cast_precision_loss)] - Some(c) => &format!("{:.1}", mul_fac * c as f64), - None => "(unknown)", - }; - println!("{fin} {fin_iter} | {iter_sec} |"); - } else { - println!("{t_name_normal} (ERRORED) | N/A | N/A | N/A | {fin_iter} | N/A |"); + out_file + .write_all(&serde_json::to_vec(&output).unwrap()) + .unwrap(); + if !quiet { + eprintln!("JSON output written to {}", file_path.display()); + } } } }