@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+
+# This script is designed to parse log files for performance measurements and create histograms of these measurements.
+# It expects log files to contain lines with measurements in the format "measurement: timeunit" where timeunit can be in milliseconds (ms) or microseconds (µs).
+# Lines that do not contain a colon ':' are skipped.
+# The script takes one or more file paths as command-line arguments, parses each log file, and then combines the data into a single DataFrame.
+# It then converts all time measurements into milliseconds, discards the original time and unit columns, and creates histograms for each unique measurement type.
+# The histograms display the distribution of times for each measurement, separated by log file, and normalized to show density rather than count.
+# To use this script, run it from the command line with the log file paths as arguments, like so:
+# python this_script.py log1.txt log2.txt ...
+# The script will then parse the provided log files and display the histograms for each type of measurement found.
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import sys
+
+def parse_log_file(file_path):
+ data = {'measurement': [], 'time': [], 'unit': [], 'log_file': []}
+ with open(file_path, 'r') as file:
+ for line in file:
+ if ':' not in line:
+ continue
+
+ parts = line.strip().split(': ')
+ if len(parts) != 2:
+ continue
+
+ measurement, time_with_unit = parts[0], parts[1]
+ if 'ms' in time_with_unit:
+ time, unit = time_with_unit[:-2], 'ms'
+ elif 'µs' in time_with_unit:
+ time, unit = time_with_unit[:-2], 'µs'
+ else:
+ raise ValueError(f"Invalid time unit in line: {line.strip()}")
+ continue
+
+ data['measurement'].append(measurement)
+ data['time'].append(float(time))
+ data['unit'].append(unit)
+ data['log_file'].append(file_path.split('/')[-1])
+ return pd.DataFrame(data)
+
+def create_histograms(df, measurement):
+ filtered_df = df[df['measurement'] == measurement]
+ plt.figure(figsize=(12, 6))
+ sns.histplot(data=filtered_df, x='time_ms', hue='log_file', element='step', stat='density', common_norm=False, palette='bright')
+ plt.title(f'Histogram of {measurement}')
+ plt.xlabel('Time (ms)')
+ plt.ylabel('Density')
+ plt.grid(True)
+ plt.xlim(filtered_df['time_ms'].quantile(0.01), filtered_df['time_ms'].quantile(0.99))
+ plt.show()
+
+
+file_paths = sys.argv[1:]
+dfs = [parse_log_file(path) for path in file_paths]
+combined_df = pd.concat(dfs, ignore_index=True)
+combined_df['time_ms'] = combined_df.apply(lambda row: row['time'] if row['unit'] == 'ms' else row['time'] / 1000, axis=1)
+combined_df.drop(['time', 'unit'], axis=1, inplace=True)
+
+measurement_types = combined_df['measurement'].unique()
+for measurement in measurement_types:
+ create_histograms(combined_df, measurement)