#!/usr/bin/env python3
"""
compare_sarb_two_runs.py

Compare two SARB-enabled experiments by plotting side-by-side boxplots of
residuals per hop (starting at hop=1). Residuals are per node vs the baseline
node (first in hop order), centered by per-node median, with optional MAD
outlier cleaning.

You can plot signed centered residuals (default), or absolute values
(--plot-abs). For table parity, metrics (p50/p95/p99/MAD) are by default
computed on |residual|.

Example:
  python compare_sarb_two_runs.py EXP_A EXP_B \
    --label-a "PI E2E m3 (1.0,2.0)" \
    --label-b "LinReg P2P 0 (64)" \
    --sarb-skip 0 --clean-mad 6 \
    --overlay-p95 --whis 5,95 \
    --out winners_box.png
"""

import argparse
import math
import re
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import MultipleLocator

RE_NODES_IN_NAME = re.compile(r'nodes\(([\d\-]+)\)')

def order_nodes_from_dirname(run_dir: Path) -> List[str]:
    m = RE_NODES_IN_NAME.search(run_dir.name)
    if m:
        order_ids = [int(x) for x in m.group(1).split('-')]
        return [f"apu{n:02d}" for n in order_ids]
    return [p.name for p in sorted(run_dir.glob("apu??")) if p.is_dir()]

def pick_sarb_csv(tsdir: Path) -> Path | None:
    if not tsdir.exists():
        return None
    eth0 = tsdir / "eth0.csv"
    if eth0.exists():
        return eth0
    csvs = sorted(tsdir.glob("*.csv"), key=lambda p: p.stat().st_mtime, reverse=True)
    return csvs[0] if csvs else None

def parse_sarb_csv(csv_path: Path) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    if {"hw_raw_sec","hw_raw_nsec"}.issubset(df.columns):
        rx_ns = df["hw_raw_sec"].astype("int64")*1_000_000_000 + df["hw_raw_nsec"].astype("int64")
    elif {"hw_sys_sec","hw_sys_nsec"}.issubset(df.columns):
        rx_ns = df["hw_sys_sec"].astype("int64")*1_000_000_000 + df["hw_sys_nsec"].astype("int64")
    else:
        rx_ns = df["sw_sec"].astype("int64")*1_000_000_000 + df["sw_nsec"].astype("int64")
    return pd.DataFrame({"seq": df["seq"].astype(int), "rx_ns": rx_ns})

def robust_stats_abs(x) -> dict:
    x = np.asarray(x, dtype=float)
    x = x[~np.isnan(x)]
    if x.size == 0:
        return dict(n=0, median=math.nan, MAD=math.nan)
    med = float(np.median(x))
    mad = float(np.median(np.abs(x - med)))
    return dict(n=int(x.size), median=med, MAD=mad)

def load_experiment_residuals(
    run_dir: Path,
    sarb_skip: int = 0,
    clean_mad: float = 6.0,
    align_seq_with: set[int] | None = None,
) -> Tuple[Dict[int, np.ndarray], str]:
    """Return: hop_index (1..) -> centered residuals (SIGNED), baseline node name."""
    nodes_order = order_nodes_from_dirname(run_dir)
    node_maps: Dict[str, Dict[int, int]] = {}
    for n in nodes_order:
        csv = pick_sarb_csv(run_dir / n / "tslogs")
        if csv is None:
            continue
        try:
            df = parse_sarb_csv(csv)
        except Exception:
            continue
        node_maps[n] = dict(zip(df["seq"].tolist(), df["rx_ns"].tolist()))
    if not node_maps:
        return {}, nodes_order[0] if nodes_order else "unknown"

    common_seq = None
    for m in node_maps.values():
        s = set(m.keys())
        common_seq = s if common_seq is None else (common_seq & s)
    common = sorted(common_seq) if common_seq else []
    if sarb_skip > 0 and common:
        common = common[sarb_skip:]
    if align_seq_with is not None:
        common = [q for q in common if q in align_seq_with]
    if not common:
        return {}, nodes_order[0] if nodes_order else "unknown"

    baseline = nodes_order[0] if nodes_order[0] in node_maps else next(iter(node_maps.keys()))
    base_map = node_maps.get(baseline, {})

    hop_residuals: Dict[int, np.ndarray] = {}
    for idx, node in enumerate(nodes_order):
        if node == baseline or node not in node_maps:
            continue
        vals = []
        m_node = node_maps[node]
        for q in common:
            b = base_map.get(q); r = m_node.get(q)
            if b is None or r is None:
                continue
            vals.append(r - b)
        if not vals:
            continue
        v = np.asarray(vals, dtype=float)
        med = float(np.median(v))
        centered = v - med
        if clean_mad and clean_mad > 0:
            st = robust_stats_abs(centered)
            mad = max(st["MAD"], 1.0)
            thr = clean_mad * mad
            centered = centered[np.abs(centered) <= thr]
        if centered.size == 0:
            continue
        hop_residuals[idx] = centered  # hop index (baseline is 0)
    return hop_residuals, baseline

def to_abs(arr: np.ndarray) -> np.ndarray:
    return np.abs(arr) if arr.size else arr

def summarize_abs(arr: np.ndarray):
    if arr.size == 0:
        return dict(n=0, p50=np.nan, p95=np.nan, p99=np.nan, mad=np.nan)
    x = np.abs(arr.astype(float))
    p50 = np.percentile(x, 50)
    p95 = np.percentile(x, 95)
    p99 = np.percentile(x, 99)
    med = np.median(x)
    mad = np.median(np.abs(x - med))
    return dict(n=int(x.size), p50=p50, p95=p95, p99=p99, mad=mad)

def parse_whis(s: str | None, plotting_abs: bool) -> tuple | float:
    """--whis: '5,95' => (5,95) percentiles; '1.5' => 1.5*IQR; default: (5,95) if abs else 1.5"""
    if not s:
        return (5.0, 95.0) if plotting_abs else 1.5
    if "," in s:
        a, b = s.split(",", 1)
        return (float(a), float(b))
    return float(s)

def main():
    ap = argparse.ArgumentParser(description="Compare two SARB experiments with side-by-side boxplots per hop (starting at 1).")
    ap.add_argument("exp_a", help="Experiment A folder")
    ap.add_argument("exp_b", help="Experiment B folder")
    ap.add_argument("--label-a", default=None, help="Legend label for experiment A")
    ap.add_argument("--label-b", default=None, help="Legend label for experiment B")
    ap.add_argument("--out", default=None, help="Output PNG path")
    ap.add_argument("--sarb-skip", type=int, default=0, help="Initial SARB sequences to skip")
    ap.add_argument("--clean-mad", type=float, default=6.0, help="MAD multiplier for outlier cleaning (<=0 disables)")
    ap.add_argument("--align-seq", action="store_true", help="Intersect sequences across BOTH experiments")
    ap.add_argument("--plot-abs", action="store_true", help="Plot |residual| (default: plot signed residuals).")
    ap.add_argument("--metrics-abs", action="store_true", default=True, help="Compute/print metrics on |residual| (default on).")
    ap.add_argument("--print-metrics", action="store_true", help="Print per-hop metrics for validation.")
    ap.add_argument("--overlay-p95", action="store_true", help="Overlay per-hop p95(|res|) markers/lines.")
    ap.add_argument("--whis", type=str, default=None, help='Whisker rule: float (IQR factor, e.g., "1.5") or percentiles "5,95".')
    args = ap.parse_args()

    run_a = Path(args.exp_a).expanduser().resolve()
    run_b = Path(args.exp_b).expanduser().resolve()
    if not run_a.is_dir() or not run_b.is_dir():
        raise SystemExit("ERROR: both arguments must be experiment directories")

    label_a = args.label_a or run_a.name
    label_b = args.label_b or run_b.name

    align_set = None
    if args.align_seq:
        def experiment_common_seq(run_dir: Path) -> set[int]:
            nodes = order_nodes_from_dirname(run_dir)
            seq_common = None
            for n in nodes:
                csv = pick_sarb_csv(run_dir / n / "tslogs")
                if not csv: continue
                try:
                    df = parse_sarb_csv(csv)
                except Exception:
                    continue
                s = set(df["seq"].astype(int).tolist())
                seq_common = s if seq_common is None else (seq_common & s)
            return seq_common or set()
        align_set = experiment_common_seq(run_a) & experiment_common_seq(run_b)

    hop_to_vals_a, baseline_a = load_experiment_residuals(
        run_a, sarb_skip=args.sarb_skip, clean_mad=args.clean_mad, align_seq_with=align_set
    )
    hop_to_vals_b, baseline_b = load_experiment_residuals(
        run_b, sarb_skip=args.sarb_skip, clean_mad=args.clean_mad, align_seq_with=align_set
    )
    if not hop_to_vals_a and not hop_to_vals_b:
        raise SystemExit("ERROR: No SARB residuals found in either experiment.")

    common_hops = sorted(set(hop_to_vals_a.keys()) & set(hop_to_vals_b.keys()))
    if not common_hops:
        common_hops = sorted(set(hop_to_vals_a.keys()) | set(hop_to_vals_b.keys()))
        print("WARNING: Experiments do not share the same hops.")

    def metrics_for(arr: np.ndarray):
        return summarize_abs(arr) if args.metrics_abs else summarize_abs(arr)  # summarize_abs ignores sign

    if args.print_metrics:
        print("\nPer-hop metrics"
              f" ({'|residual|' if args.metrics_abs else 'signed'})")
        print(" hop |   A_n    A_p50    A_p95    A_p99    A_MAD  |   B_n    B_p50    B_p95    B_p99    B_MAD")
        for h in common_hops:
            sa = metrics_for(hop_to_vals_a.get(h, np.array([])))
            sb = metrics_for(hop_to_vals_b.get(h, np.array([])))
            print(f"{h:>3} | {sa['n']:>5} {sa['p50']:>8.1f} {sa['p95']:>8.1f} {sa['p99']:>8.1f} {sa['mad']:>8.1f} "
                  f"| {sb['n']:>5} {sb['p50']:>8.1f} {sb['p95']:>8.1f} {sb['p99']:>8.1f} {sb['mad']:>8.1f}")

    # Arrays for plotting (signed or abs)
    plot_vals_a = {h: (to_abs(v) if args.plot_abs else v) for h, v in hop_to_vals_a.items()}
    plot_vals_b = {h: (to_abs(v) if args.plot_abs else v) for h, v in hop_to_vals_b.items()}

    # Prepare boxplot data
    data, positions, xticks = [], [], []
    step = 3.0
    width = 0.8
    for i, hop in enumerate(common_hops, start=1):
        base_pos = (i - 1) * step + 1.0
        data.append(plot_vals_a.get(hop, np.array([]))); positions.append(base_pos)
        data.append(plot_vals_b.get(hop, np.array([]))); positions.append(base_pos + 1.0)
        xticks.append((base_pos + base_pos + 1.0) / 2.0)

    color_a, color_b, alpha = "tab:blue", "tab:orange", 0.6
    plt.figure(figsize=(8, 6))
    if not args.plot_abs:
        plt.axhline(0.0, linestyle="--", linewidth=1)

    whis = parse_whis(args.whis, plotting_abs=args.plot_abs)
    bp = plt.boxplot(
        data,
        positions=positions,
        widths=width,
        showfliers=False,
        manage_ticks=False,
        patch_artist=True,
        whis=whis,
    )

    for i, box in enumerate(bp["boxes"]):
        is_a = (i % 2 == 0)
        c = color_a if is_a else color_b
        box.set(facecolor=c, edgecolor=c, alpha=alpha, linewidth=1.5)
    for med in bp["medians"]:
        med.set(color="black", linewidth=1.5)
    for whisk in bp["whiskers"]:
        whisk.set(color="black", linewidth=1.0)
    for cap in bp["caps"]:
        cap.set(color="black", linewidth=1.0)

    # Optional p95(|res|) overlay
    if args.overlay_p95:
        xs_a, ys_a, xs_b, ys_b = [], [], [], []
        for i, hop in enumerate(common_hops, start=1):
            center_a = (i - 1) * step + 1.0
            center_b = center_a + 1.0
            sa = summarize_abs(hop_to_vals_a.get(hop, np.array([])))
            sb = summarize_abs(hop_to_vals_b.get(hop, np.array([])))
            if np.isfinite(sa["p95"]): xs_a.append(center_a); ys_a.append(sa["p95"])
            if np.isfinite(sb["p95"]): xs_b.append(center_b); ys_b.append(sb["p95"])
        if xs_a:
            plt.plot(xs_a, ys_a, marker="o", linestyle="-", linewidth=1.2, label=f"{label_a} p95(|res|)")
        if xs_b:
            plt.plot(xs_b, ys_b, marker="o", linestyle="-", linewidth=1.2, label=f"{label_b} p95(|res|)")

    plt.xticks(xticks, [f"{h}" for h in common_hops])
    plt.xlabel("Hop count (1–8)")
    plt.ylabel("│Δ time│ vs baseline (ns) — centered, cleaned" if args.plot_abs
               else "Δ time vs baseline (ns) — centered, cleaned")
    plt.grid(True, axis="both", linestyle="-", alpha=0.3)
    legend_elems = [
        Patch(facecolor=color_a, edgecolor=color_a, alpha=alpha, label=f"{label_a}"),
        Patch(facecolor=color_b, edgecolor=color_b, alpha=alpha, label=f"{label_b}"),
    ]
    plt.legend(handles=legend_elems, loc="best")
    plt.gca().yaxis.set_major_locator(MultipleLocator(250))
    plt.tight_layout()

    out_path = args.out or str((Path.cwd() / f"comparison_{run_a.name}_vs_{run_b.name}.png").resolve())
    plt.savefig(out_path, dpi=400)
    plt.close()
    print(f"Saved comparison plot to: {out_path}")

if __name__ == "__main__":
    main()