From 5d2722de9290472bb8fbd120d1ec506f7765e209 Mon Sep 17 00:00:00 2001 From: Siho Shin Date: Sat, 27 Jun 2026 08:59:14 +0900 Subject: asdf --- dma/analyze_time.py | 332 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100755 dma/analyze_time.py (limited to 'dma/analyze_time.py') diff --git a/dma/analyze_time.py b/dma/analyze_time.py new file mode 100755 index 0000000..8f467e4 --- /dev/null +++ b/dma/analyze_time.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import sys +from collections import defaultdict +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + + +USEC_TO_UNIT = { + "sec": 1.0 / 1_000_000.0, + "msec": 1.0 / 1_000.0, + "usec": 1.0, + "nsec": 1_000.0, +} + + +LINESTYLES = ["-", "--", "-.", ":"] +MARKERS = ["o", "s", "^", "D", "v", "P", "X", "*"] + +def format_bytes(n) -> str: + """ + Format size using the largest readable unit. + + Examples: + 246132K -> 246M + 2097148K -> 2.0G + + Notes: + - CSV normally gives integer bytes. + - This also accepts strings like "246132K" if needed. + - M is shown as rounded whole MB. + - G/T are shown with one decimal place. + """ + if isinstance(n, str): + s = n.strip() + suffix = s[-1].upper() + + if suffix in {"K", "M", "G", "T"}: + value = float(s[:-1]) + # Interpret suffix input as decimal-style units. + scale = { + "K": 1_000, + "M": 1_000_000, + "G": 1_000_000_000, + "T": 1_000_000_000_000, + }[suffix] + n = int(value * scale) + else: + n = int(s) + + n = int(n) + + # Use G/T when the value is large enough. + if n >= 1024 ** 4: + return f"{n / (1024 ** 4):.2f}T" + + if n >= 1024 ** 3: + return f"{n / (1024 ** 3):.2f}G" + + if n >= 1_000_000: + return f"{round(n / 1_000_000)}M" + + if n >= 1_000: + return f"{round(n / 1_000)}K" + + return f"{n}B" + +def experiment_label(path: str) -> str: + name = Path(path).name + + if name.startswith("times_") and name.endswith(".csv"): + return name[len("times_") : -len(".csv")] + + return Path(path).stem + + +def percentile(values, p: float) -> float: + return float(np.percentile(np.array(values, dtype=float), p)) + + +def read_times_csv(path: str, unit: str): + grouped = defaultdict(list) + + with open(path, newline="") as f: + reader = csv.DictReader(f) + + if reader.fieldnames is None: + raise ValueError(f"{path}: empty CSV or missing header") + + fieldnames = [name.strip() for name in reader.fieldnames] + if fieldnames != ["bytes", "time_usec"]: + raise ValueError( + f"{path}: expected header 'bytes,time_usec', got: {','.join(fieldnames)}" + ) + + for line_no, row in enumerate(reader, start=2): + try: + nbytes = int(row["bytes"].strip()) + time_usec = float(row["time_usec"].strip()) + except Exception as e: + raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})") + + grouped[nbytes].append(time_usec * USEC_TO_UNIT[unit]) + + return grouped + + +def add_legend_and_save(fig, ax, out_path: Path, legend_title: str, legend_outside: bool): + if legend_outside: + ax.legend( + title=legend_title, + fontsize=8, + loc="center left", + bbox_to_anchor=(1.02, 0.5), + ) + fig.tight_layout(rect=[0, 0, 0.78, 1]) + else: + ax.legend(title=legend_title, fontsize=8) + fig.tight_layout() + + fig.savefig(out_path, dpi=200) + plt.close(fig) + + +def plot_size_graph( + nbytes: int, + series, + unit: str, + out_path: Path, + markers_mode: str, + line_markers: bool, + legend_outside: bool, +): + size_label = format_bytes(nbytes) + fig, ax = plt.subplots(figsize=(10, 6)) + + for item in series: + x = item["x"] + y = item["y"] + marker = item["marker"] if line_markers else None + + ax.plot( + x, + y, + linestyle=item["linestyle"], + marker=marker, + markersize=3, + markevery=max(1, len(x) // 25), + linewidth=1.8, + label=f"{item['file_label']} (n={item['n']})", + ) + + if markers_mode == "tail": + ax.scatter([item["p90"]], [0.90], marker="x", s=70, zorder=5) + ax.scatter([item["p99"]], [0.99], marker="*", s=100, zorder=5) + + ax.annotate( + "P90", + xy=(item["p90"], 0.90), + xytext=(5, 5), + textcoords="offset points", + fontsize=8, + ) + ax.annotate( + "P99", + xy=(item["p99"], 0.99), + xytext=(5, 5), + textcoords="offset points", + fontsize=8, + ) + + ax.set_xlabel(f"Latency ({unit})") + ax.set_ylabel("CDF") + ax.set_title(f"DMA Latency CDF - {size_label}") + ax.grid(True) + + add_legend_and_save( + fig, + ax, + out_path, + "Experiment", + legend_outside, + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Create separate latency CDF graphs per byte count from times_*.csv files." + ) + parser.add_argument( + "inputs", + nargs="+", + help="Input CSV files with header: bytes,time_usec. Recommended naming: times_