diff options
| author | Siho Shin <victory8500@naver.com> | 2026-06-27 08:59:14 +0900 |
|---|---|---|
| committer | Siho Shin <victory8500@naver.com> | 2026-06-27 08:59:14 +0900 |
| commit | 5d2722de9290472bb8fbd120d1ec506f7765e209 (patch) | |
| tree | 1254b5f3542f528871bcfdd0356f81095e66d480 /decompress/analyze_bw.py | |
Diffstat (limited to 'decompress/analyze_bw.py')
| -rwxr-xr-x | decompress/analyze_bw.py | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/decompress/analyze_bw.py b/decompress/analyze_bw.py new file mode 100755 index 0000000..e4fab84 --- /dev/null +++ b/decompress/analyze_bw.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import sys +from collections import defaultdict +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + + +KIBPS_TO_UNIT = { + "KiBps": 1.0, + "MiBps": 1.0 / 1024.0, + "GiBps": 1.0 / (1024.0 * 1024.0), +} + + +UNIT_DISPLAY = { + "KiBps": "KiB/s", + "MiBps": "MiB/s", + "GiBps": "GiB/s", +} + + +def format_bytes(n) -> str: + """ + Format size using the largest readable unit. + + Examples: + 246132K -> 246M + 2097148K -> 2.0G + + Notes: + - CSV normally gives integer bytes. + - This also accepts strings like "246132K" if needed. + - M is shown as rounded whole MB. + - G/T are shown with one decimal place. + """ + if isinstance(n, str): + s = n.strip() + suffix = s[-1].upper() + + if suffix in {"K", "M", "G", "T"}: + value = float(s[:-1]) + # Interpret suffix input as decimal-style units. + scale = { + "K": 1_000, + "M": 1_000_000, + "G": 1_000_000_000, + "T": 1_000_000_000_000, + }[suffix] + n = int(value * scale) + else: + n = int(s) + + n = int(n) + + # Use G/T when the value is large enough. + if n >= 1024 ** 4: + return f"{n / (1024 ** 4):.2f}T" + + if n >= 1024 ** 3: + return f"{n / (1024 ** 3):.2f}G" + + if n >= 1_000_000: + return f"{round(n / 1_000_000)}M" + + if n >= 1_000: + return f"{round(n / 1_000)}K" + + return f"{n}B" + +def experiment_label(path: str) -> str: + name = Path(path).name + + if name.startswith("bw_") and name.endswith(".csv"): + return name[len("bw_") : -len(".csv")] + + if name.startswith("bandwidth_") and name.endswith(".csv"): + return name[len("bandwidth_") : -len(".csv")] + + return Path(path).stem + + +def percentile(values, p: float) -> float: + return float(np.percentile(np.array(values, dtype=float), p)) + + +def read_bw_csv(path: str, unit: str): + grouped = defaultdict(list) + + with open(path, newline="") as f: + reader = csv.DictReader(f) + + if reader.fieldnames is None: + raise ValueError(f"{path}: empty CSV or missing header") + + fieldnames = [name.strip() for name in reader.fieldnames] + if fieldnames != ["bytes", "bw_KiBps"]: + raise ValueError( + f"{path}: expected header 'bytes,bw_KiBps', got: {','.join(fieldnames)}" + ) + + for line_no, row in enumerate(reader, start=2): + try: + nbytes = int(row["bytes"].strip()) + bw_kibps = float(row["bw_KiBps"].strip()) + except Exception as e: + raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})") + + grouped[nbytes].append(bw_kibps * KIBPS_TO_UNIT[unit]) + + return grouped + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Create grouped bandwidth bar graph from bw_*.csv files." + ) + parser.add_argument( + "inputs", + nargs="+", + help="Input CSV files with header: bytes,bw_KiBps. Recommended naming: bw_<label>.csv", + ) + parser.add_argument( + "--unit", + default="GiBps", + choices=sorted(KIBPS_TO_UNIT.keys()), + help="Output bandwidth unit. Input is always bw_KiBps. Default: GiBps", + ) + parser.add_argument( + "--out", + default="bandwidth_bar.png", + help="Output bar graph filename. Default: bandwidth_bar.png", + ) + parser.add_argument( + "--no-errorbar", + action="store_true", + help="Disable standard-deviation error bars.", + ) + parser.add_argument( + "--legend-outside", + action="store_true", + help="Place legend outside the plot.", + ) + + args = parser.parse_args() + + output_unit = UNIT_DISPLAY[args.unit] + all_data = [] + + for path in args.inputs: + try: + grouped = read_bw_csv(path, args.unit) + except ValueError as e: + print(f"ERROR: {e}", file=sys.stderr) + return 1 + + if not grouped: + print(f"ERROR: {path}: no data rows", file=sys.stderr) + return 1 + + all_data.append( + { + "path": path, + "label": experiment_label(path), + "grouped": grouped, + } + ) + + print("Bandwidth summary") + print("=================") + print(f"input_files: {len(all_data)}") + print(f"input_unit: KiB/s") + print(f"output_unit: {output_unit}") + print() + + print( + f"{'file_label':>18} {'bytes':>14} {'size':>8} {'n':>8} " + f"{'min':>12} {'avg':>12} {'std':>12} " + f"{'p1':>12} {'p5':>12} {'p10':>12} {'p50':>12} " + f"{'p90':>12} {'p99':>12} {'max':>12}" + ) + + # stats[label][nbytes] = dict(...) + stats = defaultdict(dict) + all_sizes = set() + + for item in all_data: + file_label = item["label"] + grouped = item["grouped"] + + for nbytes in sorted(grouped.keys()): + bws = np.array(grouped[nbytes], dtype=float) + all_sizes.add(nbytes) + + min_v = float(np.min(bws)) + avg = float(np.mean(bws)) + std = float(np.std(bws)) + p1 = percentile(bws, 1) + p5 = percentile(bws, 5) + p10 = percentile(bws, 10) + p50 = percentile(bws, 50) + p90 = percentile(bws, 90) + p99 = percentile(bws, 99) + max_v = float(np.max(bws)) + + size_label = format_bytes(nbytes) + + print( + f"{file_label:>18} {nbytes:14d} {size_label:>8} {len(bws):8d} " + f"{min_v:12.6f} {avg:12.6f} {std:12.6f} " + f"{p1:12.6f} {p5:12.6f} {p10:12.6f} {p50:12.6f} " + f"{p90:12.6f} {p99:12.6f} {max_v:12.6f}" + ) + + stats[file_label][nbytes] = { + "n": len(bws), + "avg": avg, + "std": std, + "min": min_v, + "max": max_v, + "p50": p50, + "p90": p90, + "p99": p99, + } + + sizes = sorted(all_sizes) + size_labels = [format_bytes(s) for s in sizes] + experiment_labels = [item["label"] for item in all_data] + + x = np.arange(len(sizes)) + num_experiments = len(experiment_labels) + + # Bar width shrinks as the number of experiments grows. + total_group_width = 0.82 + bar_width = total_group_width / max(1, num_experiments) + + fig_width = max(9, len(sizes) * 1.4) + fig, ax = plt.subplots(figsize=(fig_width, 6)) + + for idx, label in enumerate(experiment_labels): + offsets = x - total_group_width / 2 + bar_width / 2 + idx * bar_width + + means = [] + errors = [] + + for nbytes in sizes: + if nbytes in stats[label]: + means.append(stats[label][nbytes]["avg"]) + errors.append(stats[label][nbytes]["std"]) + else: + means.append(np.nan) + errors.append(0.0) + + if args.no_errorbar: + ax.bar( + offsets, + means, + width=bar_width, + label=label, + ) + else: + ax.bar( + offsets, + means, + width=bar_width, + yerr=errors, + capsize=3, + label=label, + ) + + # Automatically cut off the bottom of the y-axis to make bar differences visible. + # This intentionally truncates the y-axis. + all_bar_values = [] + all_error_values = [] + + for label in experiment_labels: + for nbytes in sizes: + if nbytes in stats[label]: + all_bar_values.append(stats[label][nbytes]["avg"]) + all_error_values.append(stats[label][nbytes]["std"]) + + if all_bar_values: + values = np.array(all_bar_values, dtype=float) + errors = np.array(all_error_values, dtype=float) + + if args.no_errorbar: + low = float(np.nanmin(values)) + high = float(np.nanmax(values)) + else: + low = float(np.nanmin(values - errors)) + high = float(np.nanmax(values + errors)) + + span = max(high - low, 1e-9) + + # Leave 10% padding below the lowest visible bar/error. + auto_bottom = low - 0.10 * span + + # Avoid negative y-axis for bandwidth. + auto_bottom = max(0.0, auto_bottom) + + ax.set_ylim(bottom=auto_bottom) + + ax.text( + 0.01, + 0.98, + "Y-axis truncated", + transform=ax.transAxes, + va="top", + fontsize=9, + ) + + ax.set_xlabel("Transfer size") + ax.set_ylabel(f"Bandwidth ({output_unit})") + ax.set_title("DMA Bandwidth by Transfer Size") + ax.set_xticks(x) + ax.set_xticklabels(size_labels) + ax.grid(axis="y", linestyle="--", alpha=0.6) + + if args.legend_outside: + ax.legend( + title="Experiment", + fontsize=8, + loc="center left", + bbox_to_anchor=(1.02, 0.5), + ) + fig.tight_layout(rect=[0, 0, 0.80, 1]) + else: + ax.legend(title="Experiment", fontsize=8) + fig.tight_layout() + + fig.savefig(args.out, dpi=200) + plt.close(fig) + + print() + print(f"Wrote bar graph: {args.out}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) |
