diff options
| author | Siho Shin <victory8500@naver.com> | 2026-06-27 08:59:14 +0900 |
|---|---|---|
| committer | Siho Shin <victory8500@naver.com> | 2026-06-27 08:59:14 +0900 |
| commit | 5d2722de9290472bb8fbd120d1ec506f7765e209 (patch) | |
| tree | 1254b5f3542f528871bcfdd0356f81095e66d480 /decompress/combine.py | |
Diffstat (limited to 'decompress/combine.py')
| -rwxr-xr-x | decompress/combine.py | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/decompress/combine.py b/decompress/combine.py new file mode 100755 index 0000000..7397449 --- /dev/null +++ b/decompress/combine.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +import csv +import glob +import math +from collections import defaultdict +from pathlib import Path + + +LABELS = ["VANILLA", "FALCO", "TETRA", "TRACE"] + + +def label_from_path(path: str, prefix: str) -> str: + stem = Path(path).stem + if stem.startswith(prefix): + stem = stem[len(prefix):] + return stem.upper() + + +def format_size(n: int) -> str: + gib = 1024 ** 3 + mib = 1024 ** 2 + kib = 1024 + + if n >= gib: + v = n / gib + if abs(v - round(v)) < 0.03: + return f"{int(round(v))}G" + return f"{v:.1f}G" + + if n >= mib: + v = n / mib + if abs(v - round(v)) < 0.03: + return f"{int(round(v))}M" + return f"{round(n / 1_000_000)}M" + + if n >= kib: + return f"{round(n / 1000)}K" + + return f"{n}B" + + +def mean(xs): + return sum(xs) / len(xs) + + +def stddev(xs): + m = mean(xs) + return math.sqrt(sum((x - m) ** 2 for x in xs) / len(xs)) + + +def prepare_times(): + groups = defaultdict(list) + + for path in sorted(glob.glob("times_*.csv")): + label = label_from_path(path, "times_") + + if label not in LABELS: + print(f"Skipping unknown latency label: {path}") + continue + + with open(path, newline="") as f: + r = csv.DictReader(f) + if r.fieldnames != ["bytes", "time_usec"]: + raise ValueError(f"{path}: expected bytes,time_usec, got {r.fieldnames}") + + for row in r: + nbytes = int(row["bytes"]) + time_usec = float(row["time_usec"]) + groups[(label, nbytes)].append(time_usec) + + sizes = sorted({nbytes for (_, nbytes) in groups.keys()}) + + fieldnames = ["bytes"] + for label in LABELS: + fieldnames += [f"{label}_time_usec", f"{label}_cdf"] + + with open("times_all.CSV", "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=fieldnames) + w.writeheader() + + for nbytes in sizes: + per_label = {} + max_n = 0 + + for label in LABELS: + values = sorted(groups.get((label, nbytes), [])) + n = len(values) + max_n = max(max_n, n) + + per_label[label] = [ + (v, (i + 1) / n) for i, v in enumerate(values) + ] if n else [] + + for i in range(max_n): + row = {"bytes": nbytes} + + for label in LABELS: + if i < len(per_label[label]): + t, cdf = per_label[label][i] + row[f"{label}_time_usec"] = f"{t / 1000.0:.6f}" + row[f"{label}_cdf"] = f"{cdf:.9f}" + else: + row[f"{label}_time_usec"] = "" + row[f"{label}_cdf"] = "" + + w.writerow(row) + + print("Wrote times_all.CSV") + + +def prepare_bw(): + groups = defaultdict(list) + + for path in sorted(glob.glob("bw_*.csv")): + label = label_from_path(path, "bw_") + + if label not in LABELS: + print(f"Skipping unknown bandwidth label: {path}") + continue + + with open(path, newline="") as f: + r = csv.DictReader(f) + if r.fieldnames != ["bytes", "bw_KiBps"]: + raise ValueError(f"{path}: expected bytes,bw_KiBps, got {r.fieldnames}") + + for row in r: + nbytes = int(row["bytes"]) + bw_mibps = float(row["bw_KiBps"]) / 1024.0 + groups[(label, nbytes)].append(bw_mibps) + + sizes = sorted({nbytes for (_, nbytes) in groups.keys()}) + + fieldnames = ["bytes", "size"] + for label in LABELS: + fieldnames += [label, f"{label}_err"] + + with open("bw_all.CSV", "w", newline="") as f: + w = csv.DictWriter(f, fieldnames=fieldnames) + w.writeheader() + + for nbytes in sizes: + row = { + "bytes": nbytes, + "size": format_size(nbytes), + } + + for label in LABELS: + values = groups.get((label, nbytes), []) + + if values: + row[label] = f"{mean(values):.6f}" + row[f"{label}_err"] = f"{stddev(values):.6f}" + else: + row[label] = "" + row[f"{label}_err"] = "" + + w.writerow(row) + + print("Wrote bw_all.CSV") + + +def main(): + prepare_times() + prepare_bw() + + +if __name__ == "__main__": + main() |
