1//! Some constants and datatypes used in the Zed perf profiler. Should only be
2//! consumed by the crate providing the matching macros.
3
4use collections::HashMap;
5use serde::{Deserialize, Serialize};
6use std::time::Duration;
7
8pub mod consts {
9 //! Preset idenitifiers and constants so that the profiler and proc macro agree
10 //! on their communication protocol.
11
12 /// The suffix on the actual test function.
13 pub const SUF_NORMAL: &str = "__ZED_PERF_FN";
14 /// The suffix on an extra function which prints metadata about a test to stdout.
15 pub const SUF_MDATA: &str = "__ZED_PERF_MDATA";
16 /// The env var in which we pass the iteration count to our tests.
17 pub const ITER_ENV_VAR: &str = "ZED_PERF_ITER";
18 /// The prefix printed on all benchmark test metadata lines, to distinguish it from
19 /// possible output by the test harness itself.
20 pub const MDATA_LINE_PREF: &str = "ZED_MDATA_";
21 /// The version number for the data returned from the test metadata function.
22 /// Increment on non-backwards-compatible changes.
23 pub const MDATA_VER: u32 = 0;
24 /// The default weight, if none is specified.
25 pub const WEIGHT_DEFAULT: u8 = 50;
26 /// How long a test must have run to be assumed to be reliable-ish.
27 pub const NOISE_CUTOFF: std::time::Duration = std::time::Duration::from_millis(250);
28
29 /// Identifier for the iteration count of a test metadata.
30 pub const ITER_COUNT_LINE_NAME: &str = "iter_count";
31 /// Identifier for the weight of a test metadata.
32 pub const WEIGHT_LINE_NAME: &str = "weight";
33 /// Identifier for importance in test metadata.
34 pub const IMPORTANCE_LINE_NAME: &str = "importance";
35 /// Identifier for the test metadata version.
36 pub const VERSION_LINE_NAME: &str = "version";
37
38 /// Where to save json run information.
39 pub const RUNS_DIR: &str = ".perf-runs";
40}
41
42/// How relevant a benchmark is.
43#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
44pub enum Importance {
45 /// Regressions shouldn't be accepted without good reason.
46 Critical = 4,
47 /// Regressions should be paid extra attention.
48 Important = 3,
49 /// No extra attention should be paid to regressions, but they might still
50 /// be indicative of something happening.
51 #[default]
52 Average = 2,
53 /// Unclear if regressions are likely to be meaningful, but still worth keeping
54 /// an eye on. Lowest level that's checked by default by the profiler.
55 Iffy = 1,
56 /// Regressions are likely to be spurious or don't affect core functionality.
57 /// Only relevant if a lot of them happen, or as supplemental evidence for a
58 /// higher-importance benchmark regressing. Not checked by default.
59 Fluff = 0,
60}
61
62impl std::fmt::Display for Importance {
63 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64 match self {
65 Importance::Critical => f.write_str("critical"),
66 Importance::Important => f.write_str("important"),
67 Importance::Average => f.write_str("average"),
68 Importance::Iffy => f.write_str("iffy"),
69 Importance::Fluff => f.write_str("fluff"),
70 }
71 }
72}
73
74/// Why or when did this test fail?
75#[derive(Clone, Debug, Serialize, Deserialize)]
76pub enum FailKind {
77 /// Failed while triaging it to determine the iteration count.
78 Triage,
79 /// Failed while profiling it.
80 Profile,
81 /// Failed due to an incompatible version for the test.
82 VersionMismatch,
83 /// Skipped due to filters applied on the perf run.
84 Skipped,
85}
86
87impl std::fmt::Display for FailKind {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 match self {
90 FailKind::Triage => f.write_str("failed in triage"),
91 FailKind::Profile => f.write_str("failed while profiling"),
92 FailKind::VersionMismatch => f.write_str("test version mismatch"),
93 FailKind::Skipped => f.write_str("skipped"),
94 }
95 }
96}
97
98/// Information about a given perf test.
99#[derive(Clone, Debug, Serialize, Deserialize)]
100pub struct TestMdata {
101 /// A version number for when the test was generated. If this is greater
102 /// than the version this test handler expects, one of the following will
103 /// happen in an unspecified manner:
104 /// - The test is skipped silently.
105 /// - The handler exits with an error message indicating the version mismatch
106 /// or inability to parse the metadata.
107 ///
108 /// INVARIANT: If `version` <= `MDATA_VER`, this tool *must* be able to
109 /// correctly parse the output of this test.
110 pub version: u32,
111 /// How many iterations to pass this test, if this is preset.
112 pub iterations: Option<usize>,
113 /// The importance of this particular test. See the docs on `Importance` for
114 /// details.
115 pub importance: Importance,
116 /// The weight of this particular test within its importance category. Used
117 /// when comparing across runs.
118 pub weight: u8,
119}
120
121/// The actual timings of a test, as measured by Hyperfine.
122#[derive(Clone, Debug, Serialize, Deserialize)]
123pub struct Timings {
124 /// Mean runtime for `self.iter_total` runs of this test.
125 pub mean: Duration,
126 /// Standard deviation for the above.
127 pub stddev: Duration,
128}
129
130impl Timings {
131 /// How many iterations does this test seem to do per second?
132 #[expect(
133 clippy::cast_precision_loss,
134 reason = "We only care about a couple sig figs anyways"
135 )]
136 #[must_use]
137 pub fn iters_per_sec(&self, total_iters: usize) -> f64 {
138 (1000. / self.mean.as_millis() as f64) * total_iters as f64
139 }
140}
141
142/// Aggregate output of all tests run by this handler.
143#[derive(Clone, Debug, Default, Serialize, Deserialize)]
144pub struct Output {
145 /// A list of test outputs. Format is `(test_name, iter_count, timings)`.
146 /// The latter being set indicates the test succeeded.
147 ///
148 /// INVARIANT: If the test succeeded, the second field is `Some(mdata)` and
149 /// `mdata.iterations` is `Some(_)`.
150 tests: Vec<(String, Option<TestMdata>, Result<Timings, FailKind>)>,
151}
152
153impl Output {
154 /// Instantiates an empty "output". Useful for merging.
155 #[must_use]
156 pub fn blank() -> Self {
157 Output { tests: Vec::new() }
158 }
159
160 /// Reports a success and adds it to this run's `Output`.
161 pub fn success(
162 &mut self,
163 name: impl AsRef<str>,
164 mut mdata: TestMdata,
165 iters: usize,
166 timings: Timings,
167 ) {
168 mdata.iterations = Some(iters);
169 self.tests
170 .push((name.as_ref().to_string(), Some(mdata), Ok(timings)));
171 }
172
173 /// Reports a failure and adds it to this run's `Output`. If this test was tried
174 /// with some number of iterations (i.e. this was not a version mismatch or skipped
175 /// test), it should be reported also.
176 ///
177 /// Using the `fail!()` macro is usually more convenient.
178 pub fn failure(
179 &mut self,
180 name: impl AsRef<str>,
181 mut mdata: Option<TestMdata>,
182 attempted_iters: Option<usize>,
183 kind: FailKind,
184 ) {
185 if let Some(ref mut mdata) = mdata {
186 mdata.iterations = attempted_iters;
187 }
188 self.tests
189 .push((name.as_ref().to_string(), mdata, Err(kind)));
190 }
191
192 /// True if no tests executed this run.
193 #[must_use]
194 pub fn is_empty(&self) -> bool {
195 self.tests.is_empty()
196 }
197
198 /// Sorts the runs in the output in the order that we want it printed.
199 pub fn sort(&mut self) {
200 self.tests.sort_unstable_by(|a, b| match (a, b) {
201 // Tests where we got no metadata go at the end.
202 ((_, Some(_), _), (_, None, _)) => std::cmp::Ordering::Greater,
203 ((_, None, _), (_, Some(_), _)) => std::cmp::Ordering::Less,
204 // Then sort by importance, then weight.
205 ((_, Some(a_mdata), _), (_, Some(b_mdata), _)) => {
206 let c = a_mdata.importance.cmp(&b_mdata.importance);
207 if matches!(c, std::cmp::Ordering::Equal) {
208 a_mdata.weight.cmp(&b_mdata.weight)
209 } else {
210 c
211 }
212 }
213 // Lastly by name.
214 ((a_name, ..), (b_name, ..)) => a_name.cmp(b_name),
215 });
216 }
217
218 /// Merges the output of two runs, appending a prefix to the results of the new run.
219 /// To be used in conjunction with `Output::blank()`, or else only some tests will have
220 /// a prefix set.
221 pub fn merge(&mut self, other: Self, pref_other: impl AsRef<str>) {
222 self.tests = std::mem::take(&mut self.tests)
223 .into_iter()
224 .chain(other.tests.into_iter().map(|(name, md, tm)| {
225 let mut new_name = "crates/".to_string();
226 new_name.push_str(pref_other.as_ref());
227 new_name.push_str("::");
228 new_name.push_str(&name);
229 (new_name, md, tm)
230 }))
231 .collect();
232 }
233
234 /// Evaluates the performance of `self` against `baseline`. The latter is taken
235 /// as the comparison point, i.e. a positive resulting `PerfReport` means that
236 /// `self` performed better.
237 ///
238 /// # Panics
239 /// `self` and `baseline` are assumed to have the iterations field on all
240 /// `TestMdata`s set to `Some(_)` if the `TestMdata` is present itself.
241 #[must_use]
242 pub fn compare_perf(self, baseline: Self) -> PerfReport {
243 let self_categories = self.collapse();
244 let mut other_categories = baseline.collapse();
245
246 let deltas = self_categories
247 .into_iter()
248 .filter_map(|(cat, self_data)| {
249 // Only compare categories where both meow
250 // runs have data. /
251 let mut other_data = other_categories.remove(&cat)?;
252 let mut max = 0.;
253 let mut min = 0.;
254
255 // Running totals for averaging out tests.
256 let mut r_total_numerator = 0.;
257 let mut r_total_denominator = 0;
258 // Yeah this is O(n^2), but realistically it'll hardly be a bottleneck.
259 for (name, (s_timings, s_iters, weight)) in self_data {
260 // Only use the new weights if they conflict.
261 let Some((o_timings, o_iters, _)) = other_data.remove(&name) else {
262 continue;
263 };
264 let shift =
265 (s_timings.iters_per_sec(s_iters) / o_timings.iters_per_sec(o_iters)) - 1.;
266 if shift > max {
267 max = shift;
268 }
269 if shift < min {
270 min = shift;
271 }
272 r_total_numerator += shift * f64::from(weight);
273 r_total_denominator += u32::from(weight);
274 }
275 let mean = r_total_numerator / f64::from(r_total_denominator);
276 // TODO: also aggregate standard deviation? that's harder to keep
277 // meaningful, though, since we dk which tests are correlated
278 Some((cat, PerfDelta { max, mean, min }))
279 })
280 .collect();
281
282 PerfReport { deltas }
283 }
284
285 /// Collapses the `PerfReport` into a `HashMap` of `Importance` <-> tests
286 /// each represented as a map of `name, (Timings, iterations, weight)`.
287 fn collapse(self) -> HashMap<Importance, HashMap<String, (Timings, usize, u8)>> {
288 let mut categories = HashMap::<Importance, HashMap<String, _>>::default();
289 for entry in self.tests {
290 if let Some(mdata) = entry.1
291 && let Ok(timings) = entry.2
292 {
293 if let Some(handle) = categories.get_mut(&mdata.importance) {
294 handle.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
295 } else {
296 let mut new = HashMap::default();
297 new.insert(entry.0, (timings, mdata.iterations.unwrap(), mdata.weight));
298 categories.insert(mdata.importance, new);
299 }
300 }
301 }
302
303 categories
304 }
305}
306
307impl std::fmt::Display for Output {
308 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309 // Don't print the header for an empty run.
310 if self.tests.is_empty() {
311 return Ok(());
312 }
313
314 // We want to print important tests at the top, then alphabetical.
315 let mut sorted = self.clone();
316 sorted.sort();
317 // Markdown header for making a nice little table :>
318 writeln!(
319 f,
320 "| Command | Iter/sec | Mean [ms] | SD [ms] | Iterations | Importance (weight) |",
321 )?;
322 writeln!(f, "|:---|---:|---:|---:|---:|---:|")?;
323 for (name, metadata, timings) in &sorted.tests {
324 match metadata {
325 Some(metadata) => match timings {
326 // Happy path.
327 Ok(timings) => {
328 // If the test succeeded, then metadata.iterations is Some(_).
329 writeln!(
330 f,
331 "| {} | {:.2} | {} | {:.2} | {} | {} ({}) |",
332 name,
333 timings.iters_per_sec(metadata.iterations.unwrap()),
334 {
335 // Very small mean runtimes will give inaccurate
336 // results. Should probably also penalise weight.
337 let mean = timings.mean.as_secs_f64() * 1000.;
338 if mean < consts::NOISE_CUTOFF.as_secs_f64() * 1000. / 8. {
339 format!("{mean:.2} (unreliable)")
340 } else {
341 format!("{mean:.2}")
342 }
343 },
344 timings.stddev.as_secs_f64() * 1000.,
345 metadata.iterations.unwrap(),
346 metadata.importance,
347 metadata.weight,
348 )?;
349 }
350 // We have (some) metadata, but the test errored.
351 Err(err) => writeln!(
352 f,
353 "| ({}) {} | N/A | N/A | N/A | {} | {} ({}) |",
354 err,
355 name,
356 metadata
357 .iterations
358 .map_or_else(|| "N/A".to_owned(), |i| format!("{i}")),
359 metadata.importance,
360 metadata.weight
361 )?,
362 },
363 // No metadata, couldn't even parse the test output.
364 None => writeln!(
365 f,
366 "| ({}) {} | N/A | N/A | N/A | N/A | N/A |",
367 timings.as_ref().unwrap_err(),
368 name
369 )?,
370 }
371 }
372 writeln!(f)?;
373 Ok(())
374 }
375}
376
377/// The difference in performance between two runs within a given importance
378/// category.
379struct PerfDelta {
380 /// The biggest improvement / least bad regression.
381 max: f64,
382 /// The weighted average change in test times.
383 mean: f64,
384 /// The worst regression / smallest improvement.
385 min: f64,
386}
387
388/// Shim type for reporting all performance deltas across importance categories.
389pub struct PerfReport {
390 /// Inner (group, diff) pairing.
391 deltas: HashMap<Importance, PerfDelta>,
392}
393
394impl std::fmt::Display for PerfReport {
395 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
396 if self.deltas.is_empty() {
397 return write!(f, "(no matching tests)");
398 }
399 let sorted = self.deltas.iter().collect::<Vec<_>>();
400 writeln!(f, "| Category | Max | Mean | Min |")?;
401 // We don't want to print too many newlines at the end, so handle newlines
402 // a little jankily like this.
403 write!(f, "|:---|---:|---:|---:|")?;
404 for (cat, delta) in sorted.into_iter().rev() {
405 write!(
406 f,
407 "\n| {cat} | {:.3} | {:.3} | {:.3} |",
408 delta.max, delta.mean, delta.min
409 )?;
410 }
411 Ok(())
412 }
413}