#!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse import sys import re import json from pathlib import Path from datetime import datetime import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from matplotlib.ticker import MultipleLocator # ---- Global plot style (same as your PI script) ---- plt.rcParams.update({ "font.size": 12, "axes.titlesize": 14, "axes.labelsize": 12, "xtick.labelsize": 10, "ytick.labelsize": 10, "legend.fontsize": 10, }) # ---------- Config ---------- NODE_TO_HOP_DEFAULT = { "apu00": 0, "apu01": 1, "apu02": 2, "apu03": 3, "apu04": 4, "apu09": 5, "apu14": 6, "apu19": 7, "apu24": 8, } RES_COL = "clean_absP95_ns" # keep identical ANALYSIS_SUBPATH = Path("_analysis") / "sarb_clean_nodes.csv" # ---------------------------- # --- Parsers tuned for LINREG folder names --- def parse_transport(run_id: str): # examples: ...__BC__BC_E2E_LINREG_E2E_... or ...BC_P2P_LINREG_P2P_... if "BC_E2E_LINREG" in run_id or re.search(r"\bE2E\b", run_id): return "E2E" if "BC_P2P_LINREG" in run_id or re.search(r"\bP2P\b", run_id): return "P2P" return None def parse_sync_pow(run_id: str): # matches: _sync0_, _sync1_, _syncm3_ m = re.search(r"_sync(m?\d)_", run_id) if not m: return None token = m.group(1) return -int(token[1:]) if token.startswith("m") else int(token) def parse_lwin(run_id: str): # matches: lwin64, lwin128 m = re.search(r"lwin(\d+)", run_id) return int(m.group(1)) if m else None def path_has_excluded(path: Path, excluded_names: set) -> bool: lowered = {p.lower() for p in path.parts} return any(name.lower() in lowered for name in excluded_names) def load_all_sarb_clean(sweep_dir: Path, exclude_dirs: set) -> pd.DataFrame: records = [] for csv_path in sweep_dir.rglob(ANALYSIS_SUBPATH.as_posix()): if path_has_excluded(csv_path, exclude_dirs) or path_has_excluded(csv_path.parent, exclude_dirs): continue run_dir = csv_path.parent.parent # ...//_analysis/sarb_clean_nodes.csv run_id = run_dir.name try: df = pd.read_csv(csv_path) except Exception as e: print(f"[WARN] Failed to read {csv_path}: {e}", file=sys.stderr) continue if "node" not in df.columns or RES_COL not in df.columns: print(f"[WARN] Missing 'node' or '{RES_COL}' in {csv_path}, skipping.", file=sys.stderr) continue df["run_id"] = run_id df["transport"] = parse_transport(run_id) df["sync_pow"] = parse_sync_pow(run_id) df["lwin"] = parse_lwin(run_id) records.append(df) if not records: raise RuntimeError("No sarb_clean_nodes.csv files found (after exclusions).") return pd.concat(records, ignore_index=True) def map_nodes_to_hops(df: pd.DataFrame, node_to_hop: dict) -> pd.DataFrame: df = df.copy() df["hop"] = df["node"].map(node_to_hop) df = df[df["hop"].notna()].copy() df["hop"] = df["hop"].astype(int) return df def spaghetti_all_linreg(out_dir: Path, avg_df: pd.DataFrame, threshold_ns: int): fig = plt.figure(figsize=(10, 6)) # All parameter combos across transport × sync_pow × lwin # Keep only survivors with hop-8 <= threshold kept_curves = [] last = avg_df[avg_df["hop"] == 8].copy() for (transport, sync_pow, lwin), g in avg_df.groupby(["transport", "sync_pow", "lwin"]): if transport is None or sync_pow is None or lwin is None: continue hop8 = last[(last["transport"] == transport) & (last["sync_pow"] == sync_pow) & (last["lwin"] == lwin)] if hop8.empty or pd.isna(hop8[RES_COL].iloc[0]): continue hop8_val = float(hop8[RES_COL].iloc[0]) if hop8_val > threshold_ns: continue kept_curves.append(((transport, int(sync_pow), int(lwin)), hop8_val, g.sort_values("hop"))) if not kept_curves: plt.text(0.5, 0.5, f"No curves ≤ {threshold_ns} ns at hop 8", ha="center", va="center", fontsize=12) plt.title("LINREG — SARB Clean P95") plt.axis("off") fig.savefig(out_dir / "spaghetti_ALL_LINREG.png", dpi=200, bbox_inches="tight") plt.close(fig) return None, None # Best = minimal hop8 residual among survivors best_key, best_val, _ = min(kept_curves, key=lambda t: t[1]) kept_count = 0 for (transport, sync_pow, lwin), hop8_val, g_sorted in kept_curves: kept_count += 1 tau_sec = (2.0 ** float(sync_pow)) # label = f"{transport}, 2^{sync_pow}, lwin{lwin} @8={hop8_val:.0f} ns" label = ( f"{transport}, $\\tau_{{{'sync'}}}$={tau_sec:g} s " # f"lwin{lwin} @8={hop8_val:.0f} ns" f"@8={hop8_val:.0f} ns" ) if (transport, sync_pow, lwin) == best_key: plt.plot(g_sorted["hop"], g_sorted[RES_COL], linestyle=":", linewidth=3, label=f"BEST {label}") else: plt.plot(g_sorted["hop"], g_sorted[RES_COL], alpha=0.9, label=label) plt.xlabel("Hop count (1–8)") plt.ylabel("Clean Abs P95 (ns)") plt.grid(True) ncol = 2 if kept_count > 10 else 1 plt.legend(ncol=ncol, framealpha=0.9) plt.gca().yaxis.set_major_locator(MultipleLocator(250)) # Match your tick density ax = plt.gca() # ax.yaxis.set_major_locator(MultipleLocator(500)) # plt.title(f"LINREG — SARB Clean P95 by hop (≤{threshold_ns/1000:.1f} µs filter) | kept {kept_count}") fig.savefig(out_dir / "spaghetti_ALL_LINREG.png", dpi=200, bbox_inches="tight") plt.close(fig) return best_key, best_val def main(): parser = argparse.ArgumentParser(description="Analyze SARB clean LINREG sweep (all-in-one figure).") parser.add_argument("sweep_dir", type=Path, help="Path to the unzipped sweep folder.") parser.add_argument("--threshold-ns", type=int, default=10_000, help="Residual threshold at hop 8 (ns) for survivor curves. Default 10000 (10 µs).") parser.add_argument("--hop-map-json", type=Path, default=None, help="Optional path to a JSON mapping node->hop.") parser.add_argument("--exclude-dir", action="append", default=["unused"], help="Folder name to exclude (can be given multiple times). Default: unused") args = parser.parse_args() sweep_dir = args.sweep_dir.resolve() if not sweep_dir.exists(): print(f"ERROR: sweep_dir '{sweep_dir}' does not exist.", file=sys.stderr) sys.exit(1) out_dir = sweep_dir / "sweep_analysis" out_dir.mkdir(parents=True, exist_ok=True) # Hop map node_to_hop = NODE_TO_HOP_DEFAULT if args.hop_map_json: try: node_to_hop = json.loads(Path(args.hop_map_json).read_text()) except Exception as e: print(f"[WARN] Failed to read hop map JSON, using default mapping: {e}", file=sys.stderr) exclude_set = set(args.exclude_dir or []) # Logging log_path = out_dir / "analysis_log.txt" with log_path.open("w") as log: log.write(f"Analysis started: {datetime.now().isoformat()}\n") log.write(f"Sweep dir: {sweep_dir}\n") log.write(f"Threshold (ns): {args.threshold_ns}\n") log.write(f"Hop map: {node_to_hop}\n") log.write(f"Excluded dirs: {sorted(exclude_set)}\n") # 1) Load & filter df = load_all_sarb_clean(sweep_dir, exclude_set) # 2) Hop mapping (and drop hop 0) df = map_nodes_to_hops(df, node_to_hop) df = df[df["hop"].between(1, 8)] # 3) Persist combined rows (for traceability) combined_csv = out_dir / "combined_sarb_clean_linreg.csv" df.to_csv(combined_csv, index=False) # 4) Average per (transport, sync_pow, lwin, hop) avg = ( df.groupby(["transport", "sync_pow", "lwin", "hop"], dropna=False)[RES_COL] .mean() .reset_index() ) # 5) Single-figure spaghetti & summary best_key, best_val = spaghetti_all_linreg(out_dir, avg, args.threshold_ns) # Build a compact summary (all survivors + best) survivors_rows = [] last = avg[avg["hop"] == 8] for (transport, sync_pow, lwin), g in avg.groupby(["transport", "sync_pow", "lwin"]): hop8 = last[(last["transport"] == transport) & (last["sync_pow"] == sync_pow) & (last["lwin"] == lwin)] if hop8.empty or pd.isna(hop8[RES_COL].iloc[0]): continue hop8_val = float(hop8[RES_COL].iloc[0]) survivors_rows.append({ "transport": transport, "sync_pow": int(sync_pow) if pd.notna(sync_pow) else None, "lwin": int(lwin) if pd.notna(lwin) else None, "hop8_p95_ns": hop8_val, "survives_threshold": hop8_val <= args.threshold_ns, "is_best": best_key == (transport, int(sync_pow), int(lwin)) if best_key else False, }) summary_df = pd.DataFrame(sorted(survivors_rows, key=lambda r: r["hop8_p95_ns"])) summary_csv = out_dir / "linreg_all_in_one_summary.csv" summary_df.to_csv(summary_csv, index=False) with log_path.open("a") as log: log.write(f"Analysis finished: {datetime.now().isoformat()}\n") log.write(f"Wrote: {combined_csv}\n") log.write(f"Wrote: {summary_csv}\n") log.write(f"Wrote: {out_dir / 'spaghetti_ALL_LINREG.png'}\n") print(f"✓ LINREG analysis complete. Results in: {out_dir}") print(f" - combined CSV: {combined_csv}") print(f" - summary CSV: {summary_csv}") print(f" - plot: spaghetti_ALL_LINREG.png") print(f" - log: {log_path}") if __name__ == "__main__": main()