summaryrefslogtreecommitdiff
path: root/decompress/analyze_time.py
diff options
context:
space:
mode:
Diffstat (limited to 'decompress/analyze_time.py')
-rwxr-xr-xdecompress/analyze_time.py332
1 files changed, 332 insertions, 0 deletions
diff --git a/decompress/analyze_time.py b/decompress/analyze_time.py
new file mode 100755
index 0000000..8f467e4
--- /dev/null
+++ b/decompress/analyze_time.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+USEC_TO_UNIT = {
+ "sec": 1.0 / 1_000_000.0,
+ "msec": 1.0 / 1_000.0,
+ "usec": 1.0,
+ "nsec": 1_000.0,
+}
+
+
+LINESTYLES = ["-", "--", "-.", ":"]
+MARKERS = ["o", "s", "^", "D", "v", "P", "X", "*"]
+
+def format_bytes(n) -> str:
+ """
+ Format size using the largest readable unit.
+
+ Examples:
+ 246132K -> 246M
+ 2097148K -> 2.0G
+
+ Notes:
+ - CSV normally gives integer bytes.
+ - This also accepts strings like "246132K" if needed.
+ - M is shown as rounded whole MB.
+ - G/T are shown with one decimal place.
+ """
+ if isinstance(n, str):
+ s = n.strip()
+ suffix = s[-1].upper()
+
+ if suffix in {"K", "M", "G", "T"}:
+ value = float(s[:-1])
+ # Interpret suffix input as decimal-style units.
+ scale = {
+ "K": 1_000,
+ "M": 1_000_000,
+ "G": 1_000_000_000,
+ "T": 1_000_000_000_000,
+ }[suffix]
+ n = int(value * scale)
+ else:
+ n = int(s)
+
+ n = int(n)
+
+ # Use G/T when the value is large enough.
+ if n >= 1024 ** 4:
+ return f"{n / (1024 ** 4):.2f}T"
+
+ if n >= 1024 ** 3:
+ return f"{n / (1024 ** 3):.2f}G"
+
+ if n >= 1_000_000:
+ return f"{round(n / 1_000_000)}M"
+
+ if n >= 1_000:
+ return f"{round(n / 1_000)}K"
+
+ return f"{n}B"
+
+def experiment_label(path: str) -> str:
+ name = Path(path).name
+
+ if name.startswith("times_") and name.endswith(".csv"):
+ return name[len("times_") : -len(".csv")]
+
+ return Path(path).stem
+
+
+def percentile(values, p: float) -> float:
+ return float(np.percentile(np.array(values, dtype=float), p))
+
+
+def read_times_csv(path: str, unit: str):
+ grouped = defaultdict(list)
+
+ with open(path, newline="") as f:
+ reader = csv.DictReader(f)
+
+ if reader.fieldnames is None:
+ raise ValueError(f"{path}: empty CSV or missing header")
+
+ fieldnames = [name.strip() for name in reader.fieldnames]
+ if fieldnames != ["bytes", "time_usec"]:
+ raise ValueError(
+ f"{path}: expected header 'bytes,time_usec', got: {','.join(fieldnames)}"
+ )
+
+ for line_no, row in enumerate(reader, start=2):
+ try:
+ nbytes = int(row["bytes"].strip())
+ time_usec = float(row["time_usec"].strip())
+ except Exception as e:
+ raise ValueError(f"{path}: invalid row at line {line_no}: {row} ({e})")
+
+ grouped[nbytes].append(time_usec * USEC_TO_UNIT[unit])
+
+ return grouped
+
+
+def add_legend_and_save(fig, ax, out_path: Path, legend_title: str, legend_outside: bool):
+ if legend_outside:
+ ax.legend(
+ title=legend_title,
+ fontsize=8,
+ loc="center left",
+ bbox_to_anchor=(1.02, 0.5),
+ )
+ fig.tight_layout(rect=[0, 0, 0.78, 1])
+ else:
+ ax.legend(title=legend_title, fontsize=8)
+ fig.tight_layout()
+
+ fig.savefig(out_path, dpi=200)
+ plt.close(fig)
+
+
+def plot_size_graph(
+ nbytes: int,
+ series,
+ unit: str,
+ out_path: Path,
+ markers_mode: str,
+ line_markers: bool,
+ legend_outside: bool,
+):
+ size_label = format_bytes(nbytes)
+ fig, ax = plt.subplots(figsize=(10, 6))
+
+ for item in series:
+ x = item["x"]
+ y = item["y"]
+ marker = item["marker"] if line_markers else None
+
+ ax.plot(
+ x,
+ y,
+ linestyle=item["linestyle"],
+ marker=marker,
+ markersize=3,
+ markevery=max(1, len(x) // 25),
+ linewidth=1.8,
+ label=f"{item['file_label']} (n={item['n']})",
+ )
+
+ if markers_mode == "tail":
+ ax.scatter([item["p90"]], [0.90], marker="x", s=70, zorder=5)
+ ax.scatter([item["p99"]], [0.99], marker="*", s=100, zorder=5)
+
+ ax.annotate(
+ "P90",
+ xy=(item["p90"], 0.90),
+ xytext=(5, 5),
+ textcoords="offset points",
+ fontsize=8,
+ )
+ ax.annotate(
+ "P99",
+ xy=(item["p99"], 0.99),
+ xytext=(5, 5),
+ textcoords="offset points",
+ fontsize=8,
+ )
+
+ ax.set_xlabel(f"Latency ({unit})")
+ ax.set_ylabel("CDF")
+ ax.set_title(f"DMA Latency CDF - {size_label}")
+ ax.grid(True)
+
+ add_legend_and_save(
+ fig,
+ ax,
+ out_path,
+ "Experiment",
+ legend_outside,
+ )
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(
+ description="Create separate latency CDF graphs per byte count from times_*.csv files."
+ )
+ parser.add_argument(
+ "inputs",
+ nargs="+",
+ help="Input CSV files with header: bytes,time_usec. Recommended naming: times_<label>.csv",
+ )
+ parser.add_argument(
+ "--unit",
+ default="usec",
+ choices=sorted(USEC_TO_UNIT.keys()),
+ help="Output time unit. Input is always time_usec. Default: usec",
+ )
+ parser.add_argument(
+ "--out",
+ default="latency_cdf.png",
+ help="Output base filename. Per-size graphs are generated from this name.",
+ )
+ parser.add_argument(
+ "--markers",
+ default="none",
+ choices=["none", "tail"],
+ help="Marker mode. 'none' disables percentile markers; 'tail' shows P90 and P99 markers. Default: none",
+ )
+ parser.add_argument(
+ "--legend-outside",
+ action="store_true",
+ help="Place legend outside the plot.",
+ )
+ parser.add_argument(
+ "--line-markers",
+ action="store_true",
+ help="Show point markers on CDF lines. Default: no line markers.",
+ )
+
+ args = parser.parse_args()
+
+ all_data = []
+
+ for path in args.inputs:
+ try:
+ grouped = read_times_csv(path, args.unit)
+ except ValueError as e:
+ print(f"ERROR: {e}", file=sys.stderr)
+ return 1
+
+ if not grouped:
+ print(f"ERROR: {path}: no data rows", file=sys.stderr)
+ return 1
+
+ all_data.append(
+ {
+ "path": path,
+ "label": experiment_label(path),
+ "grouped": grouped,
+ }
+ )
+
+ print("Latency summary")
+ print("================")
+ print(f"input_files: {len(all_data)}")
+ print(f"input_unit: usec")
+ print(f"output_unit: {args.unit}")
+ print()
+
+ print(
+ f"{'file_label':>18} {'bytes':>14} {'size':>8} {'n':>8} "
+ f"{'min':>12} {'avg':>12} {'std':>12} "
+ f"{'p50':>12} {'p90':>12} {'p99':>12} {'max':>12}"
+ )
+
+ series_by_size = defaultdict(list)
+
+ for file_idx, item in enumerate(all_data):
+ file_label = item["label"]
+ grouped = item["grouped"]
+ linestyle = LINESTYLES[file_idx % len(LINESTYLES)]
+
+ for size_idx, nbytes in enumerate(sorted(grouped.keys())):
+ times = np.array(grouped[nbytes], dtype=float)
+ sorted_times = np.sort(times)
+ cdf = np.arange(1, len(sorted_times) + 1) / len(sorted_times)
+
+ min_v = float(np.min(times))
+ avg = float(np.mean(times))
+ std = float(np.std(times))
+ p50 = percentile(times, 50)
+ p90 = percentile(times, 90)
+ p99 = percentile(times, 99)
+ max_v = float(np.max(times))
+
+ size_label = format_bytes(nbytes)
+
+ print(
+ f"{file_label:>18} {nbytes:14d} {size_label:>8} {len(times):8d} "
+ f"{min_v:12.6f} {avg:12.6f} {std:12.6f} "
+ f"{p50:12.6f} {p90:12.6f} {p99:12.6f} {max_v:12.6f}"
+ )
+
+ series_by_size[nbytes].append(
+ {
+ "file_label": file_label,
+ "nbytes": nbytes,
+ "size_label": size_label,
+ "x": sorted_times,
+ "y": cdf,
+ "n": len(times),
+ "p90": p90,
+ "p99": p99,
+ "linestyle": linestyle,
+ "marker": MARKERS[size_idx % len(MARKERS)],
+ }
+ )
+
+ out_base = Path(args.out)
+ stem = out_base.stem
+ suffix = out_base.suffix or ".png"
+
+ print()
+
+ for nbytes in sorted(series_by_size.keys()):
+ size_label = format_bytes(nbytes)
+ out_path = out_base.with_name(f"{stem}_{size_label}{suffix}")
+
+ plot_size_graph(
+ nbytes=nbytes,
+ series=series_by_size[nbytes],
+ unit=args.unit,
+ out_path=out_path,
+ markers_mode=args.markers,
+ line_markers=args.line_markers,
+ legend_outside=args.legend_outside,
+ )
+
+ print(f"Wrote CDF graph: {out_path}")
+
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())