1 files changed, 345 insertions, 0 deletions
diff --git a/dma/analyze_bw.py b/dma/analyze_bw.py
new file mode 100755
index 0000000..e4fab84
--- /dev/null
+++ b/dma/analyze_bw.py
@@ -0,0 +1,345 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+KIBPS_TO_UNIT = {
+    "KiBps": 1.0,
+    "MiBps": 1.0 / 1024.0,
+    "GiBps": 1.0 / (1024.0 * 1024.0),
+}
+
+
+UNIT_DISPLAY = {
+    "KiBps": "KiB/s",
+    "MiBps": "MiB/s",
+    "GiBps": "GiB/s",
+}
+
+
+def format_bytes(n) -> str:
+    """
+    Format size using the largest readable unit.
+
+    Examples:
+      246132K   -> 246M
+      2097148K  -> 2.0G
+
+    Notes:
+      - CSV normally gives integer bytes.
+      - This also accepts strings like "246132K" if needed.
+      - M is shown as rounded whole MB.
+      - G/T are shown with one decimal place.
+    """
+    if isinstance(n, str):
+        s = n.strip()
+        suffix = s[-1].upper()
+
+        if suffix in {"K", "M", "G", "T"}:
+            value = float(s[:-1])
+            # Interpret suffix input as decimal-style units.
+            scale = {
+                "K": 1_000,
+                "M": 1_000_000,
+                "G": 1_000_000_000,
+                "T": 1_000_000_000_000,
+            }[suffix]
+            n = int(value * scale)
+        else:
+            n = int(s)
+
+    n = int(n)
+
+    # Use G/T when the value is large enough.
+    if n >= 1024 ** 4:
+        return f"{n / (1024 ** 4):.2f}T"
+
+    if n >= 1024 ** 3:
+        return f"{n / (1024 ** 3):.2f}G"
+
+    if n >= 1_000_000:
+        return f"{round(n / 1_000_000)}M"
+
+    if n >= 1_000:
+        return f"{round(n / 1_000)}K"
+
+    return f"{n}B"
+
+def experiment_label(path: str) -> str:
+    name = Path(path).name
+
+    if name.startswith("bw_") and name.endswith(".csv"):
+        return name[len("bw_") : -len(".csv")]
+
+    if name.startswith("bandwidth_") and name.endswith(".csv"):
+        return name[len("bandwidth_") : -len(".csv")]
+
+    return Path(path).stem
+
+
+def percentile(values, p: float) -> float:
+    return float(np.percentile(np.array(values, dtype=float), p))
+
+
+def read_bw_csv(path: str, unit: str):
+    grouped = defaultdict(list)
+
+    with open(path, newline="") as f:
+        reader = csv.DictReader(f)
+
+        if reader.fieldnames is None:
+            raise ValueError(f"{path}: empty CSV or missing header")
+
+        fieldnames = [name.strip() for name in reader.fieldnames]
+        if fieldnames != ["bytes", "bw_KiBps"]:
+            raise ValueError(
+                f"{path}: expected header 'bytes,bw_KiBps', got: {','.join(fieldnames)}"
+            )
+
+        for line_no, row in enumerate(reader, start=2):
+            try:
+                nbytes = int(row["bytes"].strip())
+                bw_kibps = float(row["bw_KiBps"].strip())
+            except Exception as e:
+                raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})")
+
+            grouped[nbytes].append(bw_kibps * KIBPS_TO_UNIT[unit])
+
+    return grouped
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Create grouped bandwidth bar graph from bw_*.csv files."
+    )
+    parser.add_argument(
+        "inputs",
+        nargs="+",
+        help="Input CSV files with header: bytes,bw_KiBps. Recommended naming: bw_<label>.csv",
+    )
+    parser.add_argument(
+        "--unit",
+        default="GiBps",
+        choices=sorted(KIBPS_TO_UNIT.keys()),
+        help="Output bandwidth unit. Input is always bw_KiBps. Default: GiBps",
+    )
+    parser.add_argument(
+        "--out",
+        default="bandwidth_bar.png",
+        help="Output bar graph filename. Default: bandwidth_bar.png",
+    )
+    parser.add_argument(
+        "--no-errorbar",
+        action="store_true",
+        help="Disable standard-deviation error bars.",
+    )
+    parser.add_argument(
+        "--legend-outside",
+        action="store_true",
+        help="Place legend outside the plot.",
+    )
+
+    args = parser.parse_args()
+
+    output_unit = UNIT_DISPLAY[args.unit]
+    all_data = []
+
+    for path in args.inputs:
+        try:
+            grouped = read_bw_csv(path, args.unit)
+        except ValueError as e:
+            print(f"ERROR: {e}", file=sys.stderr)
+            return 1
+
+        if not grouped:
+            print(f"ERROR: {path}: no data rows", file=sys.stderr)
+            return 1
+
+        all_data.append(
+            {
+                "path": path,
+                "label": experiment_label(path),
+                "grouped": grouped,
+            }
+        )
+
+    print("Bandwidth summary")
+    print("=================")
+    print(f"input_files: {len(all_data)}")
+    print(f"input_unit:  KiB/s")
+    print(f"output_unit: {output_unit}")
+    print()
+
+    print(
+        f"{'file_label':>18} {'bytes':>14} {'size':>8} {'n':>8} "
+        f"{'min':>12} {'avg':>12} {'std':>12} "
+        f"{'p1':>12} {'p5':>12} {'p10':>12} {'p50':>12} "
+        f"{'p90':>12} {'p99':>12} {'max':>12}"
+    )
+
+    # stats[label][nbytes] = dict(...)
+    stats = defaultdict(dict)
+    all_sizes = set()
+
+    for item in all_data:
+        file_label = item["label"]
+        grouped = item["grouped"]
+
+        for nbytes in sorted(grouped.keys()):
+            bws = np.array(grouped[nbytes], dtype=float)
+            all_sizes.add(nbytes)
+
+            min_v = float(np.min(bws))
+            avg = float(np.mean(bws))
+            std = float(np.std(bws))
+            p1 = percentile(bws, 1)
+            p5 = percentile(bws, 5)
+            p10 = percentile(bws, 10)
+            p50 = percentile(bws, 50)
+            p90 = percentile(bws, 90)
+            p99 = percentile(bws, 99)
+            max_v = float(np.max(bws))
+
+            size_label = format_bytes(nbytes)
+
+            print(
+                f"{file_label:>18} {nbytes:14d} {size_label:>8} {len(bws):8d} "
+                f"{min_v:12.6f} {avg:12.6f} {std:12.6f} "
+                f"{p1:12.6f} {p5:12.6f} {p10:12.6f} {p50:12.6f} "
+                f"{p90:12.6f} {p99:12.6f} {max_v:12.6f}"
+            )
+
+            stats[file_label][nbytes] = {
+                "n": len(bws),
+                "avg": avg,
+                "std": std,
+                "min": min_v,
+                "max": max_v,
+                "p50": p50,
+                "p90": p90,
+                "p99": p99,
+            }
+
+    sizes = sorted(all_sizes)
+    size_labels = [format_bytes(s) for s in sizes]
+    experiment_labels = [item["label"] for item in all_data]
+
+    x = np.arange(len(sizes))
+    num_experiments = len(experiment_labels)
+
+    # Bar width shrinks as the number of experiments grows.
+    total_group_width = 0.82
+    bar_width = total_group_width / max(1, num_experiments)
+
+    fig_width = max(9, len(sizes) * 1.4)
+    fig, ax = plt.subplots(figsize=(fig_width, 6))
+
+    for idx, label in enumerate(experiment_labels):
+        offsets = x - total_group_width / 2 + bar_width / 2 + idx * bar_width
+
+        means = []
+        errors = []
+
+        for nbytes in sizes:
+            if nbytes in stats[label]:
+                means.append(stats[label][nbytes]["avg"])
+                errors.append(stats[label][nbytes]["std"])
+            else:
+                means.append(np.nan)
+                errors.append(0.0)
+
+        if args.no_errorbar:
+            ax.bar(
+                offsets,
+                means,
+                width=bar_width,
+                label=label,
+            )
+        else:
+            ax.bar(
+                offsets,
+                means,
+                width=bar_width,
+                yerr=errors,
+                capsize=3,
+                label=label,
+            )
+
+    # Automatically cut off the bottom of the y-axis to make bar differences visible.
+    # This intentionally truncates the y-axis.
+    all_bar_values = []
+    all_error_values = []
+
+    for label in experiment_labels:
+        for nbytes in sizes:
+            if nbytes in stats[label]:
+                all_bar_values.append(stats[label][nbytes]["avg"])
+                all_error_values.append(stats[label][nbytes]["std"])
+
+    if all_bar_values:
+        values = np.array(all_bar_values, dtype=float)
+        errors = np.array(all_error_values, dtype=float)
+
+        if args.no_errorbar:
+            low = float(np.nanmin(values))
+            high = float(np.nanmax(values))
+        else:
+            low = float(np.nanmin(values - errors))
+            high = float(np.nanmax(values + errors))
+
+        span = max(high - low, 1e-9)
+
+        # Leave 10% padding below the lowest visible bar/error.
+        auto_bottom = low - 0.10 * span
+
+        # Avoid negative y-axis for bandwidth.
+        auto_bottom = max(0.0, auto_bottom)
+
+        ax.set_ylim(bottom=auto_bottom)
+
+        ax.text(
+            0.01,
+            0.98,
+            "Y-axis truncated",
+            transform=ax.transAxes,
+            va="top",
+            fontsize=9,
+        )
+
+    ax.set_xlabel("Transfer size")
+    ax.set_ylabel(f"Bandwidth ({output_unit})")
+    ax.set_title("DMA Bandwidth by Transfer Size")
+    ax.set_xticks(x)
+    ax.set_xticklabels(size_labels)
+    ax.grid(axis="y", linestyle="--", alpha=0.6)
+
+    if args.legend_outside:
+        ax.legend(
+            title="Experiment",
+            fontsize=8,
+            loc="center left",
+            bbox_to_anchor=(1.02, 0.5),
+        )
+        fig.tight_layout(rect=[0, 0, 0.80, 1])
+    else:
+        ax.legend(title="Experiment", fontsize=8)
+        fig.tight_layout()
+
+    fig.savefig(args.out, dpi=200)
+    plt.close(fig)
+
+    print()
+    print(f"Wrote bar graph: {args.out}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())