#!/usr/bin/env python3 """ compare_sarb_two_runs.py Compare two SARB-enabled experiments by plotting side-by-side boxplots of residuals per hop (starting at hop=1). Residuals are per node vs the baseline node (first in hop order), centered by per-node median, with optional MAD outlier cleaning. You can plot signed centered residuals (default), or absolute values (--plot-abs). For table parity, metrics (p50/p95/p99/MAD) are by default computed on |residual|. Example: python compare_sarb_two_runs.py EXP_A EXP_B \ --label-a "PI E2E m3 (1.0,2.0)" \ --label-b "LinReg P2P 0 (64)" \ --sarb-skip 0 --clean-mad 6 \ --overlay-p95 --whis 5,95 \ --out winners_box.png """ import argparse import math import re from pathlib import Path from typing import Dict, List, Tuple import numpy as np import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from matplotlib.patches import Patch from matplotlib.ticker import MultipleLocator RE_NODES_IN_NAME = re.compile(r'nodes\(([\d\-]+)\)') def order_nodes_from_dirname(run_dir: Path) -> List[str]: m = RE_NODES_IN_NAME.search(run_dir.name) if m: order_ids = [int(x) for x in m.group(1).split('-')] return [f"apu{n:02d}" for n in order_ids] return [p.name for p in sorted(run_dir.glob("apu??")) if p.is_dir()] def pick_sarb_csv(tsdir: Path) -> Path | None: if not tsdir.exists(): return None eth0 = tsdir / "eth0.csv" if eth0.exists(): return eth0 csvs = sorted(tsdir.glob("*.csv"), key=lambda p: p.stat().st_mtime, reverse=True) return csvs[0] if csvs else None def parse_sarb_csv(csv_path: Path) -> pd.DataFrame: df = pd.read_csv(csv_path) if {"hw_raw_sec","hw_raw_nsec"}.issubset(df.columns): rx_ns = df["hw_raw_sec"].astype("int64")*1_000_000_000 + df["hw_raw_nsec"].astype("int64") elif {"hw_sys_sec","hw_sys_nsec"}.issubset(df.columns): rx_ns = df["hw_sys_sec"].astype("int64")*1_000_000_000 + df["hw_sys_nsec"].astype("int64") else: rx_ns = df["sw_sec"].astype("int64")*1_000_000_000 + df["sw_nsec"].astype("int64") return pd.DataFrame({"seq": df["seq"].astype(int), "rx_ns": rx_ns}) def robust_stats_abs(x) -> dict: x = np.asarray(x, dtype=float) x = x[~np.isnan(x)] if x.size == 0: return dict(n=0, median=math.nan, MAD=math.nan) med = float(np.median(x)) mad = float(np.median(np.abs(x - med))) return dict(n=int(x.size), median=med, MAD=mad) def load_experiment_residuals( run_dir: Path, sarb_skip: int = 0, clean_mad: float = 6.0, align_seq_with: set[int] | None = None, ) -> Tuple[Dict[int, np.ndarray], str]: """Return: hop_index (1..) -> centered residuals (SIGNED), baseline node name.""" nodes_order = order_nodes_from_dirname(run_dir) node_maps: Dict[str, Dict[int, int]] = {} for n in nodes_order: csv = pick_sarb_csv(run_dir / n / "tslogs") if csv is None: continue try: df = parse_sarb_csv(csv) except Exception: continue node_maps[n] = dict(zip(df["seq"].tolist(), df["rx_ns"].tolist())) if not node_maps: return {}, nodes_order[0] if nodes_order else "unknown" common_seq = None for m in node_maps.values(): s = set(m.keys()) common_seq = s if common_seq is None else (common_seq & s) common = sorted(common_seq) if common_seq else [] if sarb_skip > 0 and common: common = common[sarb_skip:] if align_seq_with is not None: common = [q for q in common if q in align_seq_with] if not common: return {}, nodes_order[0] if nodes_order else "unknown" baseline = nodes_order[0] if nodes_order[0] in node_maps else next(iter(node_maps.keys())) base_map = node_maps.get(baseline, {}) hop_residuals: Dict[int, np.ndarray] = {} for idx, node in enumerate(nodes_order): if node == baseline or node not in node_maps: continue vals = [] m_node = node_maps[node] for q in common: b = base_map.get(q); r = m_node.get(q) if b is None or r is None: continue vals.append(r - b) if not vals: continue v = np.asarray(vals, dtype=float) med = float(np.median(v)) centered = v - med if clean_mad and clean_mad > 0: st = robust_stats_abs(centered) mad = max(st["MAD"], 1.0) thr = clean_mad * mad centered = centered[np.abs(centered) <= thr] if centered.size == 0: continue hop_residuals[idx] = centered # hop index (baseline is 0) return hop_residuals, baseline def to_abs(arr: np.ndarray) -> np.ndarray: return np.abs(arr) if arr.size else arr def summarize_abs(arr: np.ndarray): if arr.size == 0: return dict(n=0, p50=np.nan, p95=np.nan, p99=np.nan, mad=np.nan) x = np.abs(arr.astype(float)) p50 = np.percentile(x, 50) p95 = np.percentile(x, 95) p99 = np.percentile(x, 99) med = np.median(x) mad = np.median(np.abs(x - med)) return dict(n=int(x.size), p50=p50, p95=p95, p99=p99, mad=mad) def parse_whis(s: str | None, plotting_abs: bool) -> tuple | float: """--whis: '5,95' => (5,95) percentiles; '1.5' => 1.5*IQR; default: (5,95) if abs else 1.5""" if not s: return (5.0, 95.0) if plotting_abs else 1.5 if "," in s: a, b = s.split(",", 1) return (float(a), float(b)) return float(s) def main(): ap = argparse.ArgumentParser(description="Compare two SARB experiments with side-by-side boxplots per hop (starting at 1).") ap.add_argument("exp_a", help="Experiment A folder") ap.add_argument("exp_b", help="Experiment B folder") ap.add_argument("--label-a", default=None, help="Legend label for experiment A") ap.add_argument("--label-b", default=None, help="Legend label for experiment B") ap.add_argument("--out", default=None, help="Output PNG path") ap.add_argument("--sarb-skip", type=int, default=0, help="Initial SARB sequences to skip") ap.add_argument("--clean-mad", type=float, default=6.0, help="MAD multiplier for outlier cleaning (<=0 disables)") ap.add_argument("--align-seq", action="store_true", help="Intersect sequences across BOTH experiments") ap.add_argument("--plot-abs", action="store_true", help="Plot |residual| (default: plot signed residuals).") ap.add_argument("--metrics-abs", action="store_true", default=True, help="Compute/print metrics on |residual| (default on).") ap.add_argument("--print-metrics", action="store_true", help="Print per-hop metrics for validation.") ap.add_argument("--overlay-p95", action="store_true", help="Overlay per-hop p95(|res|) markers/lines.") ap.add_argument("--whis", type=str, default=None, help='Whisker rule: float (IQR factor, e.g., "1.5") or percentiles "5,95".') args = ap.parse_args() run_a = Path(args.exp_a).expanduser().resolve() run_b = Path(args.exp_b).expanduser().resolve() if not run_a.is_dir() or not run_b.is_dir(): raise SystemExit("ERROR: both arguments must be experiment directories") label_a = args.label_a or run_a.name label_b = args.label_b or run_b.name align_set = None if args.align_seq: def experiment_common_seq(run_dir: Path) -> set[int]: nodes = order_nodes_from_dirname(run_dir) seq_common = None for n in nodes: csv = pick_sarb_csv(run_dir / n / "tslogs") if not csv: continue try: df = parse_sarb_csv(csv) except Exception: continue s = set(df["seq"].astype(int).tolist()) seq_common = s if seq_common is None else (seq_common & s) return seq_common or set() align_set = experiment_common_seq(run_a) & experiment_common_seq(run_b) hop_to_vals_a, baseline_a = load_experiment_residuals( run_a, sarb_skip=args.sarb_skip, clean_mad=args.clean_mad, align_seq_with=align_set ) hop_to_vals_b, baseline_b = load_experiment_residuals( run_b, sarb_skip=args.sarb_skip, clean_mad=args.clean_mad, align_seq_with=align_set ) if not hop_to_vals_a and not hop_to_vals_b: raise SystemExit("ERROR: No SARB residuals found in either experiment.") common_hops = sorted(set(hop_to_vals_a.keys()) & set(hop_to_vals_b.keys())) if not common_hops: common_hops = sorted(set(hop_to_vals_a.keys()) | set(hop_to_vals_b.keys())) print("WARNING: Experiments do not share the same hops.") def metrics_for(arr: np.ndarray): return summarize_abs(arr) if args.metrics_abs else summarize_abs(arr) # summarize_abs ignores sign if args.print_metrics: print("\nPer-hop metrics" f" ({'|residual|' if args.metrics_abs else 'signed'})") print(" hop | A_n A_p50 A_p95 A_p99 A_MAD | B_n B_p50 B_p95 B_p99 B_MAD") for h in common_hops: sa = metrics_for(hop_to_vals_a.get(h, np.array([]))) sb = metrics_for(hop_to_vals_b.get(h, np.array([]))) print(f"{h:>3} | {sa['n']:>5} {sa['p50']:>8.1f} {sa['p95']:>8.1f} {sa['p99']:>8.1f} {sa['mad']:>8.1f} " f"| {sb['n']:>5} {sb['p50']:>8.1f} {sb['p95']:>8.1f} {sb['p99']:>8.1f} {sb['mad']:>8.1f}") # Arrays for plotting (signed or abs) plot_vals_a = {h: (to_abs(v) if args.plot_abs else v) for h, v in hop_to_vals_a.items()} plot_vals_b = {h: (to_abs(v) if args.plot_abs else v) for h, v in hop_to_vals_b.items()} # Prepare boxplot data data, positions, xticks = [], [], [] step = 3.0 width = 0.8 for i, hop in enumerate(common_hops, start=1): base_pos = (i - 1) * step + 1.0 data.append(plot_vals_a.get(hop, np.array([]))); positions.append(base_pos) data.append(plot_vals_b.get(hop, np.array([]))); positions.append(base_pos + 1.0) xticks.append((base_pos + base_pos + 1.0) / 2.0) color_a, color_b, alpha = "tab:blue", "tab:orange", 0.6 plt.figure(figsize=(8, 6)) if not args.plot_abs: plt.axhline(0.0, linestyle="--", linewidth=1) whis = parse_whis(args.whis, plotting_abs=args.plot_abs) bp = plt.boxplot( data, positions=positions, widths=width, showfliers=False, manage_ticks=False, patch_artist=True, whis=whis, ) for i, box in enumerate(bp["boxes"]): is_a = (i % 2 == 0) c = color_a if is_a else color_b box.set(facecolor=c, edgecolor=c, alpha=alpha, linewidth=1.5) for med in bp["medians"]: med.set(color="black", linewidth=1.5) for whisk in bp["whiskers"]: whisk.set(color="black", linewidth=1.0) for cap in bp["caps"]: cap.set(color="black", linewidth=1.0) # Optional p95(|res|) overlay if args.overlay_p95: xs_a, ys_a, xs_b, ys_b = [], [], [], [] for i, hop in enumerate(common_hops, start=1): center_a = (i - 1) * step + 1.0 center_b = center_a + 1.0 sa = summarize_abs(hop_to_vals_a.get(hop, np.array([]))) sb = summarize_abs(hop_to_vals_b.get(hop, np.array([]))) if np.isfinite(sa["p95"]): xs_a.append(center_a); ys_a.append(sa["p95"]) if np.isfinite(sb["p95"]): xs_b.append(center_b); ys_b.append(sb["p95"]) if xs_a: plt.plot(xs_a, ys_a, marker="o", linestyle="-", linewidth=1.2, label=f"{label_a} p95(|res|)") if xs_b: plt.plot(xs_b, ys_b, marker="o", linestyle="-", linewidth=1.2, label=f"{label_b} p95(|res|)") plt.xticks(xticks, [f"{h}" for h in common_hops]) plt.xlabel("Hop count (1–8)") plt.ylabel("│Δ time│ vs baseline (ns) — centered, cleaned" if args.plot_abs else "Δ time vs baseline (ns) — centered, cleaned") plt.grid(True, axis="both", linestyle="-", alpha=0.3) legend_elems = [ Patch(facecolor=color_a, edgecolor=color_a, alpha=alpha, label=f"{label_a}"), Patch(facecolor=color_b, edgecolor=color_b, alpha=alpha, label=f"{label_b}"), ] plt.legend(handles=legend_elems, loc="best") plt.gca().yaxis.set_major_locator(MultipleLocator(250)) plt.tight_layout() out_path = args.out or str((Path.cwd() / f"comparison_{run_a.name}_vs_{run_b.name}.png").resolve()) plt.savefig(out_path, dpi=400) plt.close() print(f"Saved comparison plot to: {out_path}") if __name__ == "__main__": main()