#!/usr/bin/env python3 import argparse import csv import sys from collections import defaultdict from pathlib import Path import matplotlib.pyplot as plt import numpy as np KIBPS_TO_UNIT = { "KiBps": 1.0, "MiBps": 1.0 / 1024.0, "GiBps": 1.0 / (1024.0 * 1024.0), } UNIT_DISPLAY = { "KiBps": "KiB/s", "MiBps": "MiB/s", "GiBps": "GiB/s", } def format_bytes(n) -> str: """ Format size using the largest readable unit. Examples: 246132K -> 246M 2097148K -> 2.0G Notes: - CSV normally gives integer bytes. - This also accepts strings like "246132K" if needed. - M is shown as rounded whole MB. - G/T are shown with one decimal place. """ if isinstance(n, str): s = n.strip() suffix = s[-1].upper() if suffix in {"K", "M", "G", "T"}: value = float(s[:-1]) # Interpret suffix input as decimal-style units. scale = { "K": 1_000, "M": 1_000_000, "G": 1_000_000_000, "T": 1_000_000_000_000, }[suffix] n = int(value * scale) else: n = int(s) n = int(n) # Use G/T when the value is large enough. if n >= 1024 ** 4: return f"{n / (1024 ** 4):.2f}T" if n >= 1024 ** 3: return f"{n / (1024 ** 3):.2f}G" if n >= 1_000_000: return f"{round(n / 1_000_000)}M" if n >= 1_000: return f"{round(n / 1_000)}K" return f"{n}B" def experiment_label(path: str) -> str: name = Path(path).name if name.startswith("bw_") and name.endswith(".csv"): return name[len("bw_") : -len(".csv")] if name.startswith("bandwidth_") and name.endswith(".csv"): return name[len("bandwidth_") : -len(".csv")] return Path(path).stem def percentile(values, p: float) -> float: return float(np.percentile(np.array(values, dtype=float), p)) def read_bw_csv(path: str, unit: str): grouped = defaultdict(list) with open(path, newline="") as f: reader = csv.DictReader(f) if reader.fieldnames is None: raise ValueError(f"{path}: empty CSV or missing header") fieldnames = [name.strip() for name in reader.fieldnames] if fieldnames != ["bytes", "bw_KiBps"]: raise ValueError( f"{path}: expected header 'bytes,bw_KiBps', got: {','.join(fieldnames)}" ) for line_no, row in enumerate(reader, start=2): try: nbytes = int(row["bytes"].strip()) bw_kibps = float(row["bw_KiBps"].strip()) except Exception as e: raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})") grouped[nbytes].append(bw_kibps * KIBPS_TO_UNIT[unit]) return grouped def main() -> int: parser = argparse.ArgumentParser( description="Create grouped bandwidth bar graph from bw_*.csv files." ) parser.add_argument( "inputs", nargs="+", help="Input CSV files with header: bytes,bw_KiBps. Recommended naming: bw_