#!/usr/bin/env python3
"""
Backfill NBA player game logs from NBA.com (via swar/nba_api) into our snapshot format,
mapped onto existing BallDontLie player ids (by normalized full name).

Why: Until a new props/odds provider is integrated, this gives us *real* historical NBA stats
to ingest into SportsDB (no simulations).

References:
- https://github.com/swar/nba_api

Output:
  data/player_stats/nba_player_stats_nbaapi_<season>.json
  Each row shaped like:
    { date, season, player:{id,name,team}, game:{id,home_team:{abbreviation},visitor_team:{abbreviation}}, stats:{...}, source:"nba_api" }

Usage:
  python3 scripts/fetch_nba_api_gamelogs.py --season-end 2024
  python3 scripts/fetch_nba_api_gamelogs.py --season-end 2024 --limit 200
"""

from __future__ import annotations

import argparse
import json
import re
import sys
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple


ROOT = Path(__file__).resolve().parent.parent
DATA_DIR = ROOT / "data"


def normalize_name(s: str) -> str:
    s = (s or "").strip().lower()
    s = re.sub(r"[^\w\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    # Drop common suffixes that vary between providers
    suffixes = {"jr", "sr", "ii", "iii", "iv", "v"}
    parts = [p for p in s.split(" ") if p and p not in suffixes]
    return " ".join(parts)


def season_str_from_end_year(end_year: int) -> str:
    # NBA.com uses "YYYY-YY" like "2023-24"
    start = end_year - 1
    return f"{start}-{str(end_year)[-2:]}"


def parse_matchup(team: str, matchup: str) -> Tuple[Optional[str], Optional[str]]:
    """
    MATCHUP looks like:
      "DEN vs. LAL" (DEN home)
      "DEN @ LAL"   (DEN away)
    Return (homeAbbr, awayAbbr).
    """
    t = (team or "").strip().upper()
    m = (matchup or "").strip().upper().replace("VS.", "VS").replace("VS", "VS")
    # Extract the two three-letter codes
    codes = re.findall(r"\b[A-Z]{3}\b", m)
    if len(codes) < 2:
        return None, None
    a, b = codes[0], codes[1]
    if "@" in m:
        # Team is away at opponent
        home, away = b, a
    else:
        # "vs" => team is home
        home, away = a, b
    # Sanity: ensure one side matches team
    if t and t in (home, away):
        return home, away
    return home, away


def load_bdl_players() -> Dict[str, int]:
    """
    Load BallDontLie player ids from data/nba/players.json into a name->id map.
    """
    p = DATA_DIR / "nba" / "players.json"
    if not p.exists():
        raise RuntimeError(f"Missing {p}. Run the BDL updater first to fetch nba players.")
    raw = json.loads(p.read_text())
    rows = raw["data"] if isinstance(raw, dict) and isinstance(raw.get("data"), list) else raw
    out: Dict[str, int] = {}
    for r in rows:
        pid = r.get("id") or r.get("player_id") or r.get("playerId")
        if pid is None:
            continue
        name = r.get("full_name") or r.get("name") or f"{r.get('first_name','')} {r.get('last_name','')}".strip()
        if not name:
            continue
        out[normalize_name(str(name))] = int(pid)
    return out


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--season-end", type=int, required=True, help="NBA season END year (e.g., 2024 means 2023-24)")
    ap.add_argument("--limit", type=int, default=0, help="Limit number of players (0 = all)")
    ap.add_argument("--sleep", type=float, default=0.6, help="Delay between player requests (seconds)")
    ap.add_argument("--out", type=str, default="", help="Output path (defaults under data/player_stats/)")
    args = ap.parse_args()

    try:
        from nba_api.stats.endpoints import playergamelog
        from nba_api.stats.static import players as static_players
    except Exception as e:
        print("Missing dependency: nba_api. Install it first:", file=sys.stderr)
        print("  pip install nba_api", file=sys.stderr)
        print(f"Details: {e}", file=sys.stderr)
        return 2

    season_end = int(args.season_end)
    season_str = season_str_from_end_year(season_end)

    bdl_by_name = load_bdl_players()
    nba_players = static_players.get_players()
    nba_by_name: Dict[str, Dict[str, Any]] = {}
    for p in nba_players:
        full = p.get("full_name") or ""
        nba_by_name[normalize_name(full)] = p

    # Choose the intersection (only players we can map to BDL ids)
    mapped: List[Tuple[int, str, int]] = []
    for n, bdl_id in bdl_by_name.items():
        nba = nba_by_name.get(n)
        if not nba:
            continue
        mapped.append((bdl_id, nba.get("full_name") or "", int(nba.get("id"))))

    mapped.sort(key=lambda x: x[1])
    if args.limit and args.limit > 0:
        mapped = mapped[: int(args.limit)]

    print(f"[nba_api] season={season_str} (endYear={season_end})")
    print(f"[nba_api] BDL players loaded: {len(bdl_by_name)}")
    print(f"[nba_api] NBA.com players loaded: {len(nba_players)}")
    print(f"[nba_api] mapped players: {len(mapped)}")

    out_path = Path(args.out) if args.out else (DATA_DIR / "player_stats" / f"nba_player_stats_nbaapi_{season_str}.json")
    out_path.parent.mkdir(parents=True, exist_ok=True)

    # Resume support: if a prior run wrote a file, keep it and skip already-processed players.
    rows: List[Dict[str, Any]] = []
    done_player_ids = set()
    if out_path.exists():
        try:
            existing = json.loads(out_path.read_text())
            if isinstance(existing, list):
                rows = existing
                for r in rows:
                    pid = ((r or {}).get("player") or {}).get("id")
                    if pid is not None:
                        done_player_ids.add(int(pid))
        except Exception:
            # If file is corrupt/partial, ignore and rewrite.
            rows = []
            done_player_ids = set()

    def checkpoint() -> None:
        tmp = out_path.with_suffix(out_path.suffix + ".tmp")
        tmp.write_text(json.dumps(rows, indent=2, default=str))
        tmp.replace(out_path)

    misses = 0
    for i, (bdl_id, name, nba_id) in enumerate(mapped, start=1):
        if int(bdl_id) in done_player_ids:
            continue
        try:
            gl = playergamelog.PlayerGameLog(
                player_id=str(nba_id),
                season=season_str,
                season_type_all_star="Regular Season",
                timeout=30,
            )
            df = gl.get_data_frames()[0]
        except Exception:
            misses += 1
            continue

        # DataFrame -> records
        recs = df.to_dict("records") if hasattr(df, "to_dict") else []
        for r in recs:
            game_id = str(r.get("Game_ID") or r.get("GAME_ID") or "").strip()
            game_date = str(r.get("GAME_DATE") or "").strip()[:10]
            team = str(r.get("TEAM_ABBREVIATION") or "").strip().upper()
            matchup = str(r.get("MATCHUP") or "").strip()
            home, away = parse_matchup(team, matchup)

            stats = {
                "points": r.get("PTS"),
                "rebounds": r.get("REB"),
                "assists": r.get("AST"),
                "three_pointers_made": r.get("FG3M"),
                "steals": r.get("STL"),
                "blocks": r.get("BLK"),
            }

            rows.append(
                {
                    "date": game_date,
                    "season": season_end,
                    "player": {"id": bdl_id, "name": name, "team": team},
                    "game": {
                        "id": game_id if game_id else None,
                        "home_team": {"abbreviation": home} if home else None,
                        "visitor_team": {"abbreviation": away} if away else None,
                    },
                    "stats": stats,
                    "source": "nba_api",
                    "raw": r,
                }
            )

        if i % 25 == 0:
            print(f"[nba_api] fetched {i}/{len(mapped)} players (rows={len(rows)})")
            # Prevent "no output file until the very end" behavior.
            checkpoint()

        time.sleep(float(args.sleep))

    checkpoint()
    print(f"[nba_api] wrote {len(rows)} rows to {out_path}")
    if misses:
        print(f"[nba_api] warning: failed to fetch {misses} players (likely rate-limits/network)")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())

