summaryrefslogtreecommitdiff
path: root/dma/analyze_bw.py
diff options
context:
space:
mode:
Diffstat (limited to 'dma/analyze_bw.py')
-rwxr-xr-xdma/analyze_bw.py345
1 files changed, 345 insertions, 0 deletions
diff --git a/dma/analyze_bw.py b/dma/analyze_bw.py
new file mode 100755
index 0000000..e4fab84
--- /dev/null
+++ b/dma/analyze_bw.py
@@ -0,0 +1,345 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+KIBPS_TO_UNIT = {
+ "KiBps": 1.0,
+ "MiBps": 1.0 / 1024.0,
+ "GiBps": 1.0 / (1024.0 * 1024.0),
+}
+
+
+UNIT_DISPLAY = {
+ "KiBps": "KiB/s",
+ "MiBps": "MiB/s",
+ "GiBps": "GiB/s",
+}
+
+
+def format_bytes(n) -> str:
+ """
+ Format size using the largest readable unit.
+
+ Examples:
+ 246132K -> 246M
+ 2097148K -> 2.0G
+
+ Notes:
+ - CSV normally gives integer bytes.
+ - This also accepts strings like "246132K" if needed.
+ - M is shown as rounded whole MB.
+ - G/T are shown with one decimal place.
+ """
+ if isinstance(n, str):
+ s = n.strip()
+ suffix = s[-1].upper()
+
+ if suffix in {"K", "M", "G", "T"}:
+ value = float(s[:-1])
+ # Interpret suffix input as decimal-style units.
+ scale = {
+ "K": 1_000,
+ "M": 1_000_000,
+ "G": 1_000_000_000,
+ "T": 1_000_000_000_000,
+ }[suffix]
+ n = int(value * scale)
+ else:
+ n = int(s)
+
+ n = int(n)
+
+ # Use G/T when the value is large enough.
+ if n >= 1024 ** 4:
+ return f"{n / (1024 ** 4):.2f}T"
+
+ if n >= 1024 ** 3:
+ return f"{n / (1024 ** 3):.2f}G"
+
+ if n >= 1_000_000:
+ return f"{round(n / 1_000_000)}M"
+
+ if n >= 1_000:
+ return f"{round(n / 1_000)}K"
+
+ return f"{n}B"
+
+def experiment_label(path: str) -> str:
+ name = Path(path).name
+
+ if name.startswith("bw_") and name.endswith(".csv"):
+ return name[len("bw_") : -len(".csv")]
+
+ if name.startswith("bandwidth_") and name.endswith(".csv"):
+ return name[len("bandwidth_") : -len(".csv")]
+
+ return Path(path).stem
+
+
+def percentile(values, p: float) -> float:
+ return float(np.percentile(np.array(values, dtype=float), p))
+
+
+def read_bw_csv(path: str, unit: str):
+ grouped = defaultdict(list)
+
+ with open(path, newline="") as f:
+ reader = csv.DictReader(f)
+
+ if reader.fieldnames is None:
+ raise ValueError(f"{path}: empty CSV or missing header")
+
+ fieldnames = [name.strip() for name in reader.fieldnames]
+ if fieldnames != ["bytes", "bw_KiBps"]:
+ raise ValueError(
+ f"{path}: expected header 'bytes,bw_KiBps', got: {','.join(fieldnames)}"
+ )
+
+ for line_no, row in enumerate(reader, start=2):
+ try:
+ nbytes = int(row["bytes"].strip())
+ bw_kibps = float(row["bw_KiBps"].strip())
+ except Exception as e:
+ raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})")
+
+ grouped[nbytes].append(bw_kibps * KIBPS_TO_UNIT[unit])
+
+ return grouped
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(
+ description="Create grouped bandwidth bar graph from bw_*.csv files."
+ )
+ parser.add_argument(
+ "inputs",
+ nargs="+",
+ help="Input CSV files with header: bytes,bw_KiBps. Recommended naming: bw_<label>.csv",
+ )
+ parser.add_argument(
+ "--unit",
+ default="GiBps",
+ choices=sorted(KIBPS_TO_UNIT.keys()),
+ help="Output bandwidth unit. Input is always bw_KiBps. Default: GiBps",
+ )
+ parser.add_argument(
+ "--out",
+ default="bandwidth_bar.png",
+ help="Output bar graph filename. Default: bandwidth_bar.png",
+ )
+ parser.add_argument(
+ "--no-errorbar",
+ action="store_true",
+ help="Disable standard-deviation error bars.",
+ )
+ parser.add_argument(
+ "--legend-outside",
+ action="store_true",
+ help="Place legend outside the plot.",
+ )
+
+ args = parser.parse_args()
+
+ output_unit = UNIT_DISPLAY[args.unit]
+ all_data = []
+
+ for path in args.inputs:
+ try:
+ grouped = read_bw_csv(path, args.unit)
+ except ValueError as e:
+ print(f"ERROR: {e}", file=sys.stderr)
+ return 1
+
+ if not grouped:
+ print(f"ERROR: {path}: no data rows", file=sys.stderr)
+ return 1
+
+ all_data.append(
+ {
+ "path": path,
+ "label": experiment_label(path),
+ "grouped": grouped,
+ }
+ )
+
+ print("Bandwidth summary")
+ print("=================")
+ print(f"input_files: {len(all_data)}")
+ print(f"input_unit: KiB/s")
+ print(f"output_unit: {output_unit}")
+ print()
+
+ print(
+ f"{'file_label':>18} {'bytes':>14} {'size':>8} {'n':>8} "
+ f"{'min':>12} {'avg':>12} {'std':>12} "
+ f"{'p1':>12} {'p5':>12} {'p10':>12} {'p50':>12} "
+ f"{'p90':>12} {'p99':>12} {'max':>12}"
+ )
+
+ # stats[label][nbytes] = dict(...)
+ stats = defaultdict(dict)
+ all_sizes = set()
+
+ for item in all_data:
+ file_label = item["label"]
+ grouped = item["grouped"]
+
+ for nbytes in sorted(grouped.keys()):
+ bws = np.array(grouped[nbytes], dtype=float)
+ all_sizes.add(nbytes)
+
+ min_v = float(np.min(bws))
+ avg = float(np.mean(bws))
+ std = float(np.std(bws))
+ p1 = percentile(bws, 1)
+ p5 = percentile(bws, 5)
+ p10 = percentile(bws, 10)
+ p50 = percentile(bws, 50)
+ p90 = percentile(bws, 90)
+ p99 = percentile(bws, 99)
+ max_v = float(np.max(bws))
+
+ size_label = format_bytes(nbytes)
+
+ print(
+ f"{file_label:>18} {nbytes:14d} {size_label:>8} {len(bws):8d} "
+ f"{min_v:12.6f} {avg:12.6f} {std:12.6f} "
+ f"{p1:12.6f} {p5:12.6f} {p10:12.6f} {p50:12.6f} "
+ f"{p90:12.6f} {p99:12.6f} {max_v:12.6f}"
+ )
+
+ stats[file_label][nbytes] = {
+ "n": len(bws),
+ "avg": avg,
+ "std": std,
+ "min": min_v,
+ "max": max_v,
+ "p50": p50,
+ "p90": p90,
+ "p99": p99,
+ }
+
+ sizes = sorted(all_sizes)
+ size_labels = [format_bytes(s) for s in sizes]
+ experiment_labels = [item["label"] for item in all_data]
+
+ x = np.arange(len(sizes))
+ num_experiments = len(experiment_labels)
+
+ # Bar width shrinks as the number of experiments grows.
+ total_group_width = 0.82
+ bar_width = total_group_width / max(1, num_experiments)
+
+ fig_width = max(9, len(sizes) * 1.4)
+ fig, ax = plt.subplots(figsize=(fig_width, 6))
+
+ for idx, label in enumerate(experiment_labels):
+ offsets = x - total_group_width / 2 + bar_width / 2 + idx * bar_width
+
+ means = []
+ errors = []
+
+ for nbytes in sizes:
+ if nbytes in stats[label]:
+ means.append(stats[label][nbytes]["avg"])
+ errors.append(stats[label][nbytes]["std"])
+ else:
+ means.append(np.nan)
+ errors.append(0.0)
+
+ if args.no_errorbar:
+ ax.bar(
+ offsets,
+ means,
+ width=bar_width,
+ label=label,
+ )
+ else:
+ ax.bar(
+ offsets,
+ means,
+ width=bar_width,
+ yerr=errors,
+ capsize=3,
+ label=label,
+ )
+
+ # Automatically cut off the bottom of the y-axis to make bar differences visible.
+ # This intentionally truncates the y-axis.
+ all_bar_values = []
+ all_error_values = []
+
+ for label in experiment_labels:
+ for nbytes in sizes:
+ if nbytes in stats[label]:
+ all_bar_values.append(stats[label][nbytes]["avg"])
+ all_error_values.append(stats[label][nbytes]["std"])
+
+ if all_bar_values:
+ values = np.array(all_bar_values, dtype=float)
+ errors = np.array(all_error_values, dtype=float)
+
+ if args.no_errorbar:
+ low = float(np.nanmin(values))
+ high = float(np.nanmax(values))
+ else:
+ low = float(np.nanmin(values - errors))
+ high = float(np.nanmax(values + errors))
+
+ span = max(high - low, 1e-9)
+
+ # Leave 10% padding below the lowest visible bar/error.
+ auto_bottom = low - 0.10 * span
+
+ # Avoid negative y-axis for bandwidth.
+ auto_bottom = max(0.0, auto_bottom)
+
+ ax.set_ylim(bottom=auto_bottom)
+
+ ax.text(
+ 0.01,
+ 0.98,
+ "Y-axis truncated",
+ transform=ax.transAxes,
+ va="top",
+ fontsize=9,
+ )
+
+ ax.set_xlabel("Transfer size")
+ ax.set_ylabel(f"Bandwidth ({output_unit})")
+ ax.set_title("DMA Bandwidth by Transfer Size")
+ ax.set_xticks(x)
+ ax.set_xticklabels(size_labels)
+ ax.grid(axis="y", linestyle="--", alpha=0.6)
+
+ if args.legend_outside:
+ ax.legend(
+ title="Experiment",
+ fontsize=8,
+ loc="center left",
+ bbox_to_anchor=(1.02, 0.5),
+ )
+ fig.tight_layout(rect=[0, 0, 0.80, 1])
+ else:
+ ax.legend(title="Experiment", fontsize=8)
+ fig.tight_layout()
+
+ fig.savefig(args.out, dpi=200)
+ plt.close(fig)
+
+ print()
+ print(f"Wrote bar graph: {args.out}")
+
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())