#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import sys
import re
import json
from pathlib import Path
from datetime import datetime

import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

plt.rcParams.update({
    "font.size": 12,        # base font size
    "axes.titlesize": 14,   # plot titles
    "axes.labelsize": 12,   # x/y labels
    "xtick.labelsize": 10,
    "ytick.labelsize": 10,
    "legend.fontsize": 12,
})

# ---------- Config ----------
NODE_TO_HOP_DEFAULT = {
    "apu00": 0, "apu01": 1, "apu02": 2, "apu03": 3,
    "apu04": 4, "apu09": 5, "apu14": 6, "apu19": 7, "apu24": 8,
}
# RES_COL = "clean_absP99_ns"
RES_COL = "clean_absP95_ns"
ANALYSIS_SUBPATH = Path("_analysis") / "sarb_clean_nodes.csv"
# ----------------------------

def parse_mode(run_id: str):
    if "BC_E2E_PI" in run_id: return "E2E"
    if "BC_P2P_PI" in run_id: return "P2P"
    return None

def parse_gain(run_id: str, key: str):
    m = re.search(rf"{key}(\d+p\d+)", run_id)
    return float(m.group(1).replace("p", ".")) if m else None

def parse_sync_pow(run_id: str):
    m = re.search(r"_sync(m?\d)_", run_id)
    if not m: return None
    token = m.group(1)
    return -int(token[1:]) if token.startswith("m") else int(token)

def path_has_excluded(path: Path, excluded_names: set) -> bool:
    """Return True if any directory part matches an excluded name (case-insensitive)."""
    lowered = {p.lower() for p in path.parts}
    return any(name.lower() in lowered for name in excluded_names)

def load_all_sarb_clean(sweep_dir: Path, exclude_dirs: set) -> pd.DataFrame:
    records = []
    for csv_path in sweep_dir.rglob(ANALYSIS_SUBPATH.as_posix()):
        # Skip if any parent folder is excluded (e.g., "unused")
        if path_has_excluded(csv_path, exclude_dirs) or path_has_excluded(csv_path.parent, exclude_dirs):
            continue

        run_dir = csv_path.parent.parent  # .../<run_id>/_analysis/sarb_clean_nodes.csv
        run_id = run_dir.name
        try:
            df = pd.read_csv(csv_path)
        except Exception as e:
            print(f"[WARN] Failed to read {csv_path}: {e}", file=sys.stderr)
            continue

        if "node" not in df.columns:
            print(f"[WARN] Missing 'node' in {csv_path}, skipping.", file=sys.stderr)
            continue
        if RES_COL not in df.columns:
            print(f"[WARN] Missing '{RES_COL}' in {csv_path}, skipping.", file=sys.stderr)
            continue

        df["run_id"] = run_id
        df["mode"] = parse_mode(run_id)
        df["kp"] = parse_gain(run_id, "kp")
        df["ki"] = parse_gain(run_id, "ki")
        df["sync_pow"] = parse_sync_pow(run_id)
        records.append(df)

    if not records:
        raise RuntimeError("No sarb_clean_nodes.csv files found (after exclusions).")
    return pd.concat(records, ignore_index=True)

def map_nodes_to_hops(df: pd.DataFrame, node_to_hop: dict) -> pd.DataFrame:
    df = df.copy()
    df["hop"] = df["node"].map(node_to_hop)
    df = df[df["hop"].notna()].copy()
    df["hop"] = df["hop"].astype(int)
    return df

def spaghetti_mark_best_with_full_legend(out_dir: Path, avg_df: pd.DataFrame,
                                         mode_label: str, sync_pow: int, threshold_ns: int):
    sub = avg_df[(avg_df["mode"] == mode_label) & (avg_df["sync_pow"] == sync_pow)].copy()
    fig = plt.figure(figsize=(10, 6))
    if sub.empty:
        plt.text(0.5, 0.5, "No data for this panel", ha="center", va="center", fontsize=12)
        plt.title(f"{mode_label} — sync=2^{sync_pow}")
        plt.axis("off")
        fig.savefig(out_dir / f"spaghetti_{mode_label}_sync2^{sync_pow}.png", dpi=200, bbox_inches="tight")
        plt.close(fig)
        return None, None

    last = sub[sub["hop"] == 8].copy()
    if last.empty:
        plt.text(0.5, 0.5, "No hop=8 rows present", ha="center", va="center", fontsize=12)
        plt.title(f"{mode_label} — sync=2^{sync_pow}")
        plt.axis("off")
        fig.savefig(out_dir / f"spaghetti_{mode_label}_sync2^{sync_pow}.png", dpi=200, bbox_inches="tight")
        plt.close(fig)
        return None, None

    survivors = {(row["kp"], row["ki"]) for _, row in last.iterrows()
                 if pd.notna(row[RES_COL]) and float(row[RES_COL]) <= threshold_ns}

    if not survivors:
        plt.text(0.5, 0.5, f"No curves ≤ {threshold_ns} ns at hop 8", ha="center", va="center", fontsize=12)
        plt.title(f"{mode_label} — SARB Clean P95 (sync=2^{sync_pow})")
        plt.axis("off")
        fig.savefig(out_dir / f"spaghetti_{mode_label}_sync2^{sync_pow}.png", dpi=200, bbox_inches="tight")
        plt.close(fig)
        return None, None

    def hop8_val(pair):
        kp, ki = pair
        return float(last[(last["kp"] == kp) & (last["ki"] == ki)][RES_COL].iloc[0])

    best_pair = min(survivors, key=hop8_val)
    best_val = hop8_val(best_pair)

    kept_count = 0
    for (kp, ki), g in sub.groupby(["kp", "ki"]):
        if (kp, ki) not in survivors:
            continue
        kept_count += 1
        g_sorted = g.sort_values("hop")
        label = f"kp={kp}, ki={ki}"
        if (kp, ki) == best_pair:
            plt.plot(g_sorted["hop"], g_sorted[RES_COL], linestyle=":", linewidth=3,
                     label=f"BEST {label} @ hop8={best_val:.0f} ns")
        else:
            plt.plot(g_sorted["hop"], g_sorted[RES_COL], alpha=0.9, label=label)

    plt.xlabel("Hop count (1–8)")
    plt.ylabel("Clean Abs P95 (ns)")
    # plt.title(f"{mode_label} — SARB Clean P95 by hop (sync=2^{sync_pow}, ≤{threshold_ns/1000:.1f} µs filter) | kept {kept_count}")
    plt.grid(True)
    ncol = 2 if kept_count > 10 else 1
    # plt.legend(fontsize=8, ncol=ncol, framealpha=0.9)
    plt.legend(ncol=ncol, framealpha=0.9)
    plt.gca().yaxis.set_major_locator(MultipleLocator(500))
    fig.savefig(out_dir / f"spaghetti_{mode_label}_sync2^{sync_pow}.png", dpi=200, bbox_inches="tight")
    plt.close(fig)
    return best_pair, best_val

def main():
    parser = argparse.ArgumentParser(description="Analyze SARB clean results across a PTP sweep.")
    parser.add_argument("sweep_dir", type=Path, help="Path to the unzipped sweep folder.")
    parser.add_argument("--threshold-ns", type=int, default=10_000,
                        help="Residual threshold at hop 8 (ns) for survivor curves. Default 10000 (10 µs).")
    parser.add_argument("--hop-map-json", type=Path, default=None,
                        help="Optional path to a JSON file mapping node->hop.")
    parser.add_argument("--exclude-dir", action="append", default=["unused"],
                        help="Folder name to exclude (can be given multiple times). Default: unused")
    args = parser.parse_args()

    sweep_dir = args.sweep_dir.resolve()
    if not sweep_dir.exists():
        print(f"ERROR: sweep_dir '{sweep_dir}' does not exist.", file=sys.stderr)
        sys.exit(1)

    out_dir = sweep_dir / "sweep_analysis"
    out_dir.mkdir(parents=True, exist_ok=True)

    # Hop map
    node_to_hop = NODE_TO_HOP_DEFAULT
    if args.hop_map_json:
        try:
            node_to_hop = json.loads(Path(args.hop_map_json).read_text())
        except Exception as e:
            print(f"[WARN] Failed to read hop map JSON, using default mapping: {e}", file=sys.stderr)

    exclude_set = set(args.exclude_dir or [])

    # Logging
    log_path = out_dir / "analysis_log.txt"
    with log_path.open("w") as log:
        log.write(f"Analysis started: {datetime.now().isoformat()}\n")
        log.write(f"Sweep dir: {sweep_dir}\n")
        log.write(f"Threshold (ns): {args.threshold_ns}\n")
        log.write(f"Hop map: {node_to_hop}\n")
        log.write(f"Excluded dirs: {sorted(exclude_set)}\n")

    # 1) Load & filter
    df = load_all_sarb_clean(sweep_dir, exclude_set)

    # 2) Hop mapping
    df = map_nodes_to_hops(df, node_to_hop)
    
    # >>> Start at first hop: ignore hop 0 <<<
    df = df[df["hop"].between(1, 8)]

    # 3) Persist combined rows
    combined_csv = out_dir / "combined_sarb_clean.csv"
    df.to_csv(combined_csv, index=False)

    # 4) Average per (mode, sync_pow, kp, ki, hop)
    avg = (
        df.groupby(["mode", "sync_pow", "kp", "ki", "hop"], dropna=False)[RES_COL]
        .mean()
        .reset_index()
    )

    # 5) Plots & summary
    rows = []
    for mode_label in sorted(avg["mode"].dropna().unique()):
        sync_values = sorted(avg[avg["mode"] == mode_label]["sync_pow"].dropna().unique())
        for sync_pow in sync_values:
            best_pair, best_val = spaghetti_mark_best_with_full_legend(
                out_dir, avg, mode_label, int(sync_pow), args.threshold_ns
            )
            sub = avg[(avg["mode"] == mode_label) & (avg["sync_pow"] == sync_pow)]
            last = sub[sub["hop"] == 8]
            kept = (last[RES_COL] <= args.threshold_ns).sum() if not last.empty else 0
            rows.append({
                "mode": mode_label,
                "sync_pow": int(sync_pow),
                "kept_params": int(kept),
                "best_kp": None if best_pair is None else float(best_pair[0]),
                "best_ki": None if best_pair is None else float(best_pair[1]),
                "best_hop8_p95_ns": None if best_val is None else float(best_val),
            })

    summary_df = pd.DataFrame(rows).sort_values(["mode", "sync_pow"])
    summary_csv = out_dir / "best_by_mode_sync.csv"
    summary_df.to_csv(summary_csv, index=False)

    with log_path.open("a") as log:
        log.write(f"Analysis finished: {datetime.now().isoformat()}\n")
        log.write(f"Wrote: {combined_csv}\n")
        log.write(f"Wrote: {summary_csv}\n")
        for png in sorted(out_dir.glob("spaghetti_*_sync2^*.png")):
            log.write(f"Wrote: {png}\n")

    print(f"✓ Analysis complete. Results in: {out_dir}")
    print(f"  - combined CSV: {combined_csv}")
    print(f"  - summary CSV:  {summary_csv}")
    print(f"  - plots:        spaghetti_<MODE>_sync2^<p>.png")
    print(f"  - log:          {log_path}")

if __name__ == "__main__":
    main()