Introduce `ZED_MEASUREMENTS=1` and `script/histogram` (#4191)

Antonio Scandurra created

With this pull request, we're introducing a `ZED_MEASUREMENTS` variable
which is used by the `measure` function to print to stderr how long it
took to execute a given closure. This is used right now to measure frame
time when a new frame is requested. This data can be plotted as a
histogram via `script/histogram`.

Here's a workflow I typically use:

- Run `export ZED_MEASUREMENTS=1`
- Checkout a commit that you want to measure the performance of
- Run zed in release mode and use it for 5-10 seconds
- Copy the printed measurements into a file, say `version-a`
- Checkout another commit that you want to measure the performance of
- Run zed in release mode and use it for 5-10 seconds
- Copy the printed measurements into a file, say `version-b`
- Run `script/histogram version-a version-b` (note that you can supply
however many files you want)

Release Notes:

- N/A

Change summary

crates/gpui/src/window.rs |  6 ++-
crates/util/src/util.rs   | 28 +++++++++++++--
script/histogram          | 73 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 101 insertions(+), 6 deletions(-)

Detailed changes

crates/gpui/src/window.rs ๐Ÿ”—

@@ -34,7 +34,7 @@ use std::{
         Arc,
     },
 };
-use util::ResultExt;
+use util::{measure, ResultExt};
 
 mod element_cx;
 pub use element_cx::*;
@@ -310,7 +310,9 @@ impl Window {
         platform_window.on_request_frame(Box::new({
             let mut cx = cx.to_async();
             move || {
-                handle.update(&mut cx, |_, cx| cx.draw()).log_err();
+                measure("frame duration", || {
+                    handle.update(&mut cx, |_, cx| cx.draw()).log_err();
+                })
             }
         }));
         platform_window.on_resize(Box::new({

crates/util/src/util.rs ๐Ÿ”—

@@ -7,19 +7,21 @@ pub mod paths;
 #[cfg(any(test, feature = "test-support"))]
 pub mod test;
 
+pub use backtrace::Backtrace;
+use futures::Future;
+use lazy_static::lazy_static;
+use rand::{seq::SliceRandom, Rng};
 use std::{
     borrow::Cow,
     cmp::{self, Ordering},
+    env,
     ops::{AddAssign, Range, RangeInclusive},
     panic::Location,
     pin::Pin,
     task::{Context, Poll},
+    time::Instant,
 };
 
-pub use backtrace::Backtrace;
-use futures::Future;
-use rand::{seq::SliceRandom, Rng};
-
 pub use take_until::*;
 
 #[macro_export]
@@ -133,6 +135,24 @@ pub fn merge_non_null_json_value_into(source: serde_json::Value, target: &mut se
     }
 }
 
+pub fn measure<R>(label: &str, f: impl FnOnce() -> R) -> R {
+    lazy_static! {
+        pub static ref ZED_MEASUREMENTS: bool = env::var("ZED_MEASUREMENTS")
+            .map(|measurements| measurements == "1" || measurements == "true")
+            .unwrap_or(false);
+    }
+
+    if *ZED_MEASUREMENTS {
+        let start = Instant::now();
+        let result = f();
+        let elapsed = start.elapsed();
+        eprintln!("{}: {:?}", label, elapsed);
+        result
+    } else {
+        f()
+    }
+}
+
 pub trait ResultExt<E> {
     type Ok;
 

script/histogram ๐Ÿ”—

@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# Required dependencies for this script:
+#
+# pandas: For data manipulation and analysis.
+# matplotlib: For creating static, interactive, and animated visualizations in Python.
+# seaborn: For making statistical graphics in Python, based on matplotlib.
+
+# To install these dependencies, use the following pip command:
+# pip install pandas matplotlib seaborn
+
+# This script is designed to parse log files for performance measurements and create histograms of these measurements.
+# It expects log files to contain lines with measurements in the format "measurement: timeunit" where timeunit can be in milliseconds (ms) or microseconds (ยตs).
+# Lines that do not contain a colon ':' are skipped.
+# The script takes one or more file paths as command-line arguments, parses each log file, and then combines the data into a single DataFrame.
+# It then converts all time measurements into milliseconds, discards the original time and unit columns, and creates histograms for each unique measurement type.
+# The histograms display the distribution of times for each measurement, separated by log file, and normalized to show density rather than count.
+# To use this script, run it from the command line with the log file paths as arguments, like so:
+# python this_script.py log1.txt log2.txt ...
+# The script will then parse the provided log files and display the histograms for each type of measurement found.
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import sys
+
+def parse_log_file(file_path):
+    data = {'measurement': [], 'time': [], 'unit': [], 'log_file': []}
+    with open(file_path, 'r') as file:
+        for line in file:
+            if ':' not in line:
+                continue
+
+            parts = line.strip().split(': ')
+            if len(parts) != 2:
+                continue
+
+            measurement, time_with_unit = parts[0], parts[1]
+            if 'ms' in time_with_unit:
+                time, unit = time_with_unit[:-2], 'ms'
+            elif 'ยตs' in time_with_unit:
+                time, unit = time_with_unit[:-2], 'ยตs'
+            else:
+                raise ValueError(f"Invalid time unit in line: {line.strip()}")
+                continue
+
+            data['measurement'].append(measurement)
+            data['time'].append(float(time))
+            data['unit'].append(unit)
+            data['log_file'].append(file_path.split('/')[-1])
+    return pd.DataFrame(data)
+
+def create_histograms(df, measurement):
+    filtered_df = df[df['measurement'] == measurement]
+    plt.figure(figsize=(12, 6))
+    sns.histplot(data=filtered_df, x='time_ms', hue='log_file', element='step', stat='density', common_norm=False, palette='bright')
+    plt.title(f'Histogram of {measurement}')
+    plt.xlabel('Time (ms)')
+    plt.ylabel('Density')
+    plt.grid(True)
+    plt.xlim(filtered_df['time_ms'].quantile(0.01), filtered_df['time_ms'].quantile(0.99))
+    plt.show()
+
+
+file_paths = sys.argv[1:]
+dfs = [parse_log_file(path) for path in file_paths]
+combined_df = pd.concat(dfs, ignore_index=True)
+combined_df['time_ms'] = combined_df.apply(lambda row: row['time'] if row['unit'] == 'ms' else row['time'] / 1000, axis=1)
+combined_df.drop(['time', 'unit'], axis=1, inplace=True)
+
+measurement_types = combined_df['measurement'].unique()
+for measurement in measurement_types:
+    create_histograms(combined_df, measurement)