diff --git a/script/histogram b/script/histogram new file mode 100755 index 0000000000..93232fdfcd --- /dev/null +++ b/script/histogram @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +# This script is designed to parse log files for performance measurements and create histograms of these measurements. +# It expects log files to contain lines with measurements in the format "measurement: timeunit" where timeunit can be in milliseconds (ms) or microseconds (µs). +# Lines that do not contain a colon ':' are skipped. +# The script takes one or more file paths as command-line arguments, parses each log file, and then combines the data into a single DataFrame. +# It then converts all time measurements into milliseconds, discards the original time and unit columns, and creates histograms for each unique measurement type. +# The histograms display the distribution of times for each measurement, separated by log file, and normalized to show density rather than count. +# To use this script, run it from the command line with the log file paths as arguments, like so: +# python this_script.py log1.txt log2.txt ... +# The script will then parse the provided log files and display the histograms for each type of measurement found. + +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import sys + +def parse_log_file(file_path): + data = {'measurement': [], 'time': [], 'unit': [], 'log_file': []} + with open(file_path, 'r') as file: + for line in file: + if ':' not in line: + continue + + parts = line.strip().split(': ') + if len(parts) != 2: + continue + + measurement, time_with_unit = parts[0], parts[1] + if 'ms' in time_with_unit: + time, unit = time_with_unit[:-2], 'ms' + elif 'µs' in time_with_unit: + time, unit = time_with_unit[:-2], 'µs' + else: + raise ValueError(f"Invalid time unit in line: {line.strip()}") + continue + + data['measurement'].append(measurement) + data['time'].append(float(time)) + data['unit'].append(unit) + data['log_file'].append(file_path.split('/')[-1]) + return pd.DataFrame(data) + +def create_histograms(df, measurement): + filtered_df = df[df['measurement'] == measurement] + plt.figure(figsize=(12, 6)) + sns.histplot(data=filtered_df, x='time_ms', hue='log_file', element='step', stat='density', common_norm=False, palette='bright') + plt.title(f'Histogram of {measurement}') + plt.xlabel('Time (ms)') + plt.ylabel('Density') + plt.grid(True) + plt.xlim(filtered_df['time_ms'].quantile(0.01), filtered_df['time_ms'].quantile(0.99)) + plt.show() + + +file_paths = sys.argv[1:] +dfs = [parse_log_file(path) for path in file_paths] +combined_df = pd.concat(dfs, ignore_index=True) +combined_df['time_ms'] = combined_df.apply(lambda row: row['time'] if row['unit'] == 'ms' else row['time'] / 1000, axis=1) +combined_df.drop(['time', 'unit'], axis=1, inplace=True) + +measurement_types = combined_df['measurement'].unique() +for measurement in measurement_types: + create_histograms(combined_df, measurement)