1#!/usr/bin/env python3
2
3# This script is designed to parse log files for performance measurements and create histograms of these measurements.
4# It expects log files to contain lines with measurements in the format "measurement: timeunit" where timeunit can be in milliseconds (ms) or microseconds (µs).
5# Lines that do not contain a colon ':' are skipped.
6# The script takes one or more file paths as command-line arguments, parses each log file, and then combines the data into a single DataFrame.
7# It then converts all time measurements into milliseconds, discards the original time and unit columns, and creates histograms for each unique measurement type.
8# The histograms display the distribution of times for each measurement, separated by log file, and normalized to show density rather than count.
9# To use this script, run it from the command line with the log file paths as arguments, like so:
10# python this_script.py log1.txt log2.txt ...
11# The script will then parse the provided log files and display the histograms for each type of measurement found.
12
13import pandas as pd
14import matplotlib.pyplot as plt
15import seaborn as sns
16import sys
17
18def parse_log_file(file_path):
19 data = {'measurement': [], 'time': [], 'unit': [], 'log_file': []}
20 with open(file_path, 'r') as file:
21 for line in file:
22 if ':' not in line:
23 continue
24
25 parts = line.strip().split(': ')
26 if len(parts) != 2:
27 continue
28
29 measurement, time_with_unit = parts[0], parts[1]
30 if 'ms' in time_with_unit:
31 time, unit = time_with_unit[:-2], 'ms'
32 elif 'µs' in time_with_unit:
33 time, unit = time_with_unit[:-2], 'µs'
34 else:
35 raise ValueError(f"Invalid time unit in line: {line.strip()}")
36 continue
37
38 data['measurement'].append(measurement)
39 data['time'].append(float(time))
40 data['unit'].append(unit)
41 data['log_file'].append(file_path.split('/')[-1])
42 return pd.DataFrame(data)
43
44def create_histograms(df, measurement):
45 filtered_df = df[df['measurement'] == measurement]
46 plt.figure(figsize=(12, 6))
47 sns.histplot(data=filtered_df, x='time_ms', hue='log_file', element='step', stat='density', common_norm=False, palette='bright')
48 plt.title(f'Histogram of {measurement}')
49 plt.xlabel('Time (ms)')
50 plt.ylabel('Density')
51 plt.grid(True)
52 plt.xlim(filtered_df['time_ms'].quantile(0.01), filtered_df['time_ms'].quantile(0.99))
53 plt.show()
54
55
56file_paths = sys.argv[1:]
57dfs = [parse_log_file(path) for path in file_paths]
58combined_df = pd.concat(dfs, ignore_index=True)
59combined_df['time_ms'] = combined_df.apply(lambda row: row['time'] if row['unit'] == 'ms' else row['time'] / 1000, axis=1)
60combined_df.drop(['time', 'unit'], axis=1, inplace=True)
61
62measurement_types = combined_df['measurement'].unique()
63for measurement in measurement_types:
64 create_histograms(combined_df, measurement)