#!/usr/bin/env python3 """ compare_sarb_two_runs.py Compare two SARB-enabled experiments by plotting side-by-side boxplots of (centered) residuals per hop (starting from hop=1). For each experiment, residuals are computed per node versus the baseline node (first in hop order), then centered by per-node median. Optional outlier cleaning with MAD. Example: python compare_sarb_two_runs.py /path/to/EXP_A /path/to/EXP_B \ --out comparison_A_vs_B.png --clean-mad 6 --sarb-skip 0 Inputs (same structure as your analyzer expects): /apu00/, /apu01/, ... Each node may contain tslogs/*.csv with columns including: - seq and one of: - hw_raw_sec, hw_raw_nsec - hw_sys_sec, hw_sys_nsec - sw_sec, sw_nsec Options: --label-a, --label-b Labels in the legend (default: folder names) --out Output PNG path (default built from folder names) --sarb-skip Initial SARB sequences to skip (per experiment) --clean-mad MAD multiplier for outlier cleaning (<=0 disables) --align-seq If set, intersect sequences across BOTH experiments (in addition to per-experiment common filtering) """ import argparse import math import re from pathlib import Path from typing import Dict, List, Tuple import numpy as np import pandas as pd # Plotting import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from matplotlib.lines import Line2D from matplotlib.patches import Patch from matplotlib.ticker import MultipleLocator # --------- regex to infer hop order from directory name ---------- RE_NODES_IN_NAME = re.compile(r'nodes\(([\d\-]+)\)') def order_nodes_from_dirname(run_dir: Path) -> List[str]: m = RE_NODES_IN_NAME.search(run_dir.name) if m: order_ids = [int(x) for x in m.group(1).split('-')] return [f"apu{n:02d}" for n in order_ids] # fallback: alphabetical apu?? return [p.name for p in sorted(run_dir.glob("apu??")) if p.is_dir()] # ---------- helpers to pick & parse SARB csv ---------- def pick_sarb_csv(tsdir: Path) -> Path | None: if not tsdir.exists(): return None eth0 = tsdir / "eth0.csv" if eth0.exists(): return eth0 csvs = sorted(tsdir.glob("*.csv"), key=lambda p: p.stat().st_mtime, reverse=True) return csvs[0] if csvs else None def parse_sarb_csv(csv_path: Path) -> pd.DataFrame: df = pd.read_csv(csv_path) if {"hw_raw_sec","hw_raw_nsec"}.issubset(df.columns): rx_ns = df["hw_raw_sec"].astype("int64")*1_000_000_000 + df["hw_raw_nsec"].astype("int64") elif {"hw_sys_sec","hw_sys_nsec"}.issubset(df.columns): rx_ns = df["hw_sys_sec"].astype("int64")*1_000_000_000 + df["hw_sys_nsec"].astype("int64") else: rx_ns = df["sw_sec"].astype("int64")*1_000_000_000 + df["sw_nsec"].astype("int64") return pd.DataFrame({"seq": df["seq"].astype(int), "rx_ns": rx_ns}) def robust_stats_abs(x) -> dict: x = np.asarray(x, dtype=float) x = x[~np.isnan(x)] if x.size == 0: return dict(n=0, median=math.nan, MAD=math.nan) med = float(np.median(x)) mad = float(np.median(np.abs(x - med))) return dict(n=int(x.size), median=med, MAD=mad) # ---------- load one experiment's SARB residuals per hop ---------- def load_experiment_residuals( run_dir: Path, sarb_skip: int = 0, clean_mad: float = 6.0, align_seq_with: set[int] | None = None, ) -> Tuple[Dict[int, np.ndarray], str]: """ Returns: hop_residuals: dict hop_index(int, starting 1) -> np.ndarray of centered residuals (ns) baseline_node: str node name used as baseline """ nodes_order = order_nodes_from_dirname(run_dir) # Build per-node seq->rx_ns maps for nodes that have SARB data node_maps: Dict[str, Dict[int, int]] = {} for n in nodes_order: tsdir = run_dir / n / "tslogs" csv = pick_sarb_csv(tsdir) if csv is None: continue try: df = parse_sarb_csv(csv) except Exception: continue node_maps[n] = dict(zip(df["seq"].tolist(), df["rx_ns"].tolist())) if not node_maps: return {}, nodes_order[0] if nodes_order else "unknown" # Determine per-experiment common sequences across nodes that have data common_seq = None for m in node_maps.values(): s = set(m.keys()) common_seq = s if common_seq is None else (common_seq & s) common = sorted(common_seq) if common_seq else [] if sarb_skip > 0 and common: common = common[sarb_skip:] # Optional: intersect with other experiment's sequences to align if align_seq_with is not None: common = [q for q in common if q in align_seq_with] if not common: return {}, nodes_order[0] if nodes_order else "unknown" baseline = nodes_order[0] if baseline not in node_maps: # choose first node that has SARB as baseline fallback baseline = next(iter(node_maps.keys())) hop_residuals: Dict[int, np.ndarray] = {} base_map = node_maps.get(baseline, {}) for idx, node in enumerate(nodes_order): if node == baseline: continue # hop 0 ignored if node not in node_maps: continue vals = [] m_node = node_maps[node] for q in common: b = base_map.get(q) r = m_node.get(q) if b is None or r is None: continue vals.append(r - b) if not vals: continue v = np.asarray(vals, dtype=float) # center by median med = float(np.median(v)) centered = v - med # optional MAD cleaning if clean_mad and clean_mad > 0: st = robust_stats_abs(centered) mad = max(st["MAD"], 1.0) thr = clean_mad * mad centered = centered[np.abs(centered) <= thr] if centered.size == 0: continue hop = idx # baseline at 0, so this is >=1 hop_residuals[hop] = centered return hop_residuals, baseline # ---------- main plotting ---------- def main(): ap = argparse.ArgumentParser(description="Compare two SARB experiments with side-by-side boxplots per hop (starting at 1).") ap.add_argument("exp_a", help="Experiment A folder") ap.add_argument("exp_b", help="Experiment B folder") ap.add_argument("--label-a", default=None, help="Legend label for experiment A (default: folder name)") ap.add_argument("--label-b", default=None, help="Legend label for experiment B (default: folder name)") ap.add_argument("--out", default=None, help="Output PNG path (default: comparison__vs_.png)") ap.add_argument("--sarb-skip", type=int, default=0, help="Initial SARB sequences to skip per experiment") ap.add_argument("--clean-mad", type=float, default=6.0, help="MAD multiplier for outlier cleaning (<=0 disables)") ap.add_argument("--align-seq", action="store_true", help="Intersect sequences across BOTH experiments before residuals") args = ap.parse_args() run_a = Path(args.exp_a).expanduser().resolve() run_b = Path(args.exp_b).expanduser().resolve() if not run_a.is_dir() or not run_b.is_dir(): raise SystemExit("ERROR: both arguments must be experiment directories") label_a = args.label_a or run_a.name label_b = args.label_b or run_b.name # If aligning sequences across experiments, we need the per-experiment common sets. align_set = None if args.align_seq: # Build a set of sequences that appear in ALL nodes (of each experiment), then intersect across A and B. def experiment_common_seq(run_dir: Path) -> set[int]: nodes = order_nodes_from_dirname(run_dir) seq_common = None for n in nodes: csv = pick_sarb_csv(run_dir / n / "tslogs") if not csv: continue try: df = parse_sarb_csv(csv) except Exception: continue s = set(df["seq"].astype(int).tolist()) seq_common = s if seq_common is None else (seq_common & s) return seq_common or set() common_a = experiment_common_seq(run_a) common_b = experiment_common_seq(run_b) align_set = common_a & common_b # apply per-experiment skip later inside loaders hop_to_vals_a, baseline_a = load_experiment_residuals( run_a, sarb_skip=args.sarb_skip, clean_mad=args.clean_mad, align_seq_with=align_set ) hop_to_vals_b, baseline_b = load_experiment_residuals( run_b, sarb_skip=args.sarb_skip, clean_mad=args.clean_mad, align_seq_with=align_set ) if not hop_to_vals_a and not hop_to_vals_b: raise SystemExit("ERROR: No SARB residuals found in either experiment.") # Only plot hops that exist in BOTH experiments so boxes appear side-by-side common_hops = sorted(set(hop_to_vals_a.keys()) & set(hop_to_vals_b.keys())) if not common_hops: # Fallback: plot whatever exists, but warn that some hops are missing common_hops = sorted(set(hop_to_vals_a.keys()) | set(hop_to_vals_b.keys())) print("WARNING: Experiments do not share the same hops; plotting available hops (some boxes may be single).") # Prepare boxplot data & positions data = [] positions = [] xticks = [] step = 3.0 # spacing per hop group width = 0.8 for i, hop in enumerate(common_hops, start=1): base_pos = (i - 1) * step + 1.0 # A at base_pos, B at base_pos + 1 if hop in hop_to_vals_a: data.append(hop_to_vals_a[hop]) positions.append(base_pos) else: data.append(np.array([])) positions.append(base_pos) if hop in hop_to_vals_b: data.append(hop_to_vals_b[hop]) positions.append(base_pos + 1.0) else: data.append(np.array([])) positions.append(base_pos + 1.0) xticks.append((base_pos + base_pos + 1.0) / 2.0) # center tick between A and B color_a = "tab:blue" color_b = "tab:orange" alpha = 0.6 # plt.figure(figsize=(max(8, int(1.5 * len(common_hops) + 6)), 6)) plt.figure(figsize=(8, 6)) # plt.title("SARB residuals (centered) per hop: side-by-side comparison") plt.axhline(0.0, linestyle="--", linewidth=1) bp = plt.boxplot( data, positions=positions, widths=width, showfliers=False, manage_ticks=False, patch_artist=True, # <-- allows facecolors ) # Color boxes: A then B, repeating for each hop for i, box in enumerate(bp["boxes"]): is_a = (i % 2 == 0) c = color_a if is_a else color_b box.set(facecolor=c, edgecolor=c, alpha=alpha, linewidth=1.5) # Color medians/whiskers/caps consistently for i, med in enumerate(bp["medians"]): med.set(color="black", linewidth=1.5) for whisk in bp["whiskers"]: whisk.set(color="black", linewidth=1.0) for cap in bp["caps"]: cap.set(color="black", linewidth=1.0) # X-axis ticks as "hop 1", "hop 2", ... plt.xticks(xticks, [f"{h}" for h in common_hops]) plt.xlabel("Hop count (1-8)") # plt.ylabel("Residual vs baseline (ns) — centered by node median") plt.ylabel("Δ time (ns) — compared to TO") plt.grid(True) # Legend (colored patches) legend_elems = [ # Patch(facecolor=color_a, edgecolor=color_a, alpha=alpha, label=f"{label_a} (baseline: {baseline_a})"), # Patch(facecolor=color_b, edgecolor=color_b, alpha=alpha, label=f"{label_b} (baseline: {baseline_b})"), Patch(facecolor=color_a, edgecolor=color_a, alpha=alpha, label=f"{label_a}"), Patch(facecolor=color_b, edgecolor=color_b, alpha=alpha, label=f"{label_b}"), ] plt.legend(handles=legend_elems, loc="best") plt.gca().yaxis.set_major_locator(MultipleLocator(250)) plt.tight_layout() out_path = args.out if not out_path: out_name = f"comparison_{run_a.name}_vs_{run_b.name}.png" out_path = str((Path.cwd() / out_name).resolve()) plt.savefig(out_path, dpi=400) plt.close() print(f"Saved comparison plot to: {out_path}") if __name__ == "__main__": main()