#!/usr/bin/env python3 import argparse import csv import sys from collections import defaultdict from pathlib import Path import matplotlib.pyplot as plt import numpy as np USEC_TO_UNIT = { "sec": 1.0 / 1_000_000.0, "msec": 1.0 / 1_000.0, "usec": 1.0, "nsec": 1_000.0, } LINESTYLES = ["-", "--", "-.", ":"] MARKERS = ["o", "s", "^", "D", "v", "P", "X", "*"] def format_bytes(n) -> str: """ Format size using the largest readable unit. Examples: 246132K -> 246M 2097148K -> 2.0G Notes: - CSV normally gives integer bytes. - This also accepts strings like "246132K" if needed. - M is shown as rounded whole MB. - G/T are shown with one decimal place. """ if isinstance(n, str): s = n.strip() suffix = s[-1].upper() if suffix in {"K", "M", "G", "T"}: value = float(s[:-1]) # Interpret suffix input as decimal-style units. scale = { "K": 1_000, "M": 1_000_000, "G": 1_000_000_000, "T": 1_000_000_000_000, }[suffix] n = int(value * scale) else: n = int(s) n = int(n) # Use G/T when the value is large enough. if n >= 1024 ** 4: return f"{n / (1024 ** 4):.2f}T" if n >= 1024 ** 3: return f"{n / (1024 ** 3):.2f}G" if n >= 1_000_000: return f"{round(n / 1_000_000)}M" if n >= 1_000: return f"{round(n / 1_000)}K" return f"{n}B" def experiment_label(path: str) -> str: name = Path(path).name if name.startswith("times_") and name.endswith(".csv"): return name[len("times_") : -len(".csv")] return Path(path).stem def percentile(values, p: float) -> float: return float(np.percentile(np.array(values, dtype=float), p)) def read_times_csv(path: str, unit: str): grouped = defaultdict(list) with open(path, newline="") as f: reader = csv.DictReader(f) if reader.fieldnames is None: raise ValueError(f"{path}: empty CSV or missing header") fieldnames = [name.strip() for name in reader.fieldnames] if fieldnames != ["bytes", "time_usec"]: raise ValueError( f"{path}: expected header 'bytes,time_usec', got: {','.join(fieldnames)}" ) for line_no, row in enumerate(reader, start=2): try: nbytes = int(row["bytes"].strip()) time_usec = float(row["time_usec"].strip()) except Exception as e: raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})") grouped[nbytes].append(time_usec * USEC_TO_UNIT[unit]) return grouped def add_legend_and_save(fig, ax, out_path: Path, legend_title: str, legend_outside: bool): if legend_outside: ax.legend( title=legend_title, fontsize=8, loc="center left", bbox_to_anchor=(1.02, 0.5), ) fig.tight_layout(rect=[0, 0, 0.78, 1]) else: ax.legend(title=legend_title, fontsize=8) fig.tight_layout() fig.savefig(out_path, dpi=200) plt.close(fig) def plot_size_graph( nbytes: int, series, unit: str, out_path: Path, markers_mode: str, line_markers: bool, legend_outside: bool, ): size_label = format_bytes(nbytes) fig, ax = plt.subplots(figsize=(10, 6)) for item in series: x = item["x"] y = item["y"] marker = item["marker"] if line_markers else None ax.plot( x, y, linestyle=item["linestyle"], marker=marker, markersize=3, markevery=max(1, len(x) // 25), linewidth=1.8, label=f"{item['file_label']} (n={item['n']})", ) if markers_mode == "tail": ax.scatter([item["p90"]], [0.90], marker="x", s=70, zorder=5) ax.scatter([item["p99"]], [0.99], marker="*", s=100, zorder=5) ax.annotate( "P90", xy=(item["p90"], 0.90), xytext=(5, 5), textcoords="offset points", fontsize=8, ) ax.annotate( "P99", xy=(item["p99"], 0.99), xytext=(5, 5), textcoords="offset points", fontsize=8, ) ax.set_xlabel(f"Latency ({unit})") ax.set_ylabel("CDF") ax.set_title(f"DMA Latency CDF - {size_label}") ax.grid(True) add_legend_and_save( fig, ax, out_path, "Experiment", legend_outside, ) def main() -> int: parser = argparse.ArgumentParser( description="Create separate latency CDF graphs per byte count from times_*.csv files." ) parser.add_argument( "inputs", nargs="+", help="Input CSV files with header: bytes,time_usec. Recommended naming: times_