summaryrefslogtreecommitdiff
path: root/decompress/combine.py
diff options
context:
space:
mode:
Diffstat (limited to 'decompress/combine.py')
-rwxr-xr-xdecompress/combine.py169
1 files changed, 169 insertions, 0 deletions
diff --git a/decompress/combine.py b/decompress/combine.py
new file mode 100755
index 0000000..7397449
--- /dev/null
+++ b/decompress/combine.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+
+import csv
+import glob
+import math
+from collections import defaultdict
+from pathlib import Path
+
+
+LABELS = ["VANILLA", "FALCO", "TETRA", "TRACE"]
+
+
+def label_from_path(path: str, prefix: str) -> str:
+ stem = Path(path).stem
+ if stem.startswith(prefix):
+ stem = stem[len(prefix):]
+ return stem.upper()
+
+
+def format_size(n: int) -> str:
+ gib = 1024 ** 3
+ mib = 1024 ** 2
+ kib = 1024
+
+ if n >= gib:
+ v = n / gib
+ if abs(v - round(v)) < 0.03:
+ return f"{int(round(v))}G"
+ return f"{v:.1f}G"
+
+ if n >= mib:
+ v = n / mib
+ if abs(v - round(v)) < 0.03:
+ return f"{int(round(v))}M"
+ return f"{round(n / 1_000_000)}M"
+
+ if n >= kib:
+ return f"{round(n / 1000)}K"
+
+ return f"{n}B"
+
+
+def mean(xs):
+ return sum(xs) / len(xs)
+
+
+def stddev(xs):
+ m = mean(xs)
+ return math.sqrt(sum((x - m) ** 2 for x in xs) / len(xs))
+
+
+def prepare_times():
+ groups = defaultdict(list)
+
+ for path in sorted(glob.glob("times_*.csv")):
+ label = label_from_path(path, "times_")
+
+ if label not in LABELS:
+ print(f"Skipping unknown latency label: {path}")
+ continue
+
+ with open(path, newline="") as f:
+ r = csv.DictReader(f)
+ if r.fieldnames != ["bytes", "time_usec"]:
+ raise ValueError(f"{path}: expected bytes,time_usec, got {r.fieldnames}")
+
+ for row in r:
+ nbytes = int(row["bytes"])
+ time_usec = float(row["time_usec"])
+ groups[(label, nbytes)].append(time_usec)
+
+ sizes = sorted({nbytes for (_, nbytes) in groups.keys()})
+
+ fieldnames = ["bytes"]
+ for label in LABELS:
+ fieldnames += [f"{label}_time_usec", f"{label}_cdf"]
+
+ with open("times_all.CSV", "w", newline="") as f:
+ w = csv.DictWriter(f, fieldnames=fieldnames)
+ w.writeheader()
+
+ for nbytes in sizes:
+ per_label = {}
+ max_n = 0
+
+ for label in LABELS:
+ values = sorted(groups.get((label, nbytes), []))
+ n = len(values)
+ max_n = max(max_n, n)
+
+ per_label[label] = [
+ (v, (i + 1) / n) for i, v in enumerate(values)
+ ] if n else []
+
+ for i in range(max_n):
+ row = {"bytes": nbytes}
+
+ for label in LABELS:
+ if i < len(per_label[label]):
+ t, cdf = per_label[label][i]
+ row[f"{label}_time_usec"] = f"{t / 1000.0:.6f}"
+ row[f"{label}_cdf"] = f"{cdf:.9f}"
+ else:
+ row[f"{label}_time_usec"] = ""
+ row[f"{label}_cdf"] = ""
+
+ w.writerow(row)
+
+ print("Wrote times_all.CSV")
+
+
+def prepare_bw():
+ groups = defaultdict(list)
+
+ for path in sorted(glob.glob("bw_*.csv")):
+ label = label_from_path(path, "bw_")
+
+ if label not in LABELS:
+ print(f"Skipping unknown bandwidth label: {path}")
+ continue
+
+ with open(path, newline="") as f:
+ r = csv.DictReader(f)
+ if r.fieldnames != ["bytes", "bw_KiBps"]:
+ raise ValueError(f"{path}: expected bytes,bw_KiBps, got {r.fieldnames}")
+
+ for row in r:
+ nbytes = int(row["bytes"])
+ bw_mibps = float(row["bw_KiBps"]) / 1024.0
+ groups[(label, nbytes)].append(bw_mibps)
+
+ sizes = sorted({nbytes for (_, nbytes) in groups.keys()})
+
+ fieldnames = ["bytes", "size"]
+ for label in LABELS:
+ fieldnames += [label, f"{label}_err"]
+
+ with open("bw_all.CSV", "w", newline="") as f:
+ w = csv.DictWriter(f, fieldnames=fieldnames)
+ w.writeheader()
+
+ for nbytes in sizes:
+ row = {
+ "bytes": nbytes,
+ "size": format_size(nbytes),
+ }
+
+ for label in LABELS:
+ values = groups.get((label, nbytes), [])
+
+ if values:
+ row[label] = f"{mean(values):.6f}"
+ row[f"{label}_err"] = f"{stddev(values):.6f}"
+ else:
+ row[label] = ""
+ row[f"{label}_err"] = ""
+
+ w.writerow(row)
+
+ print("Wrote bw_all.CSV")
+
+
+def main():
+ prepare_times()
+ prepare_bw()
+
+
+if __name__ == "__main__":
+ main()