#!/usr/bin/env python3
"""
Ingest NBA referee impact metrics (avg total, avg FTA, avg fouls) into TeamGameMetric
using ESPN public endpoints to avoid NBA.com rate limits.
"""
import json
import time
from datetime import datetime, timezone
from urllib.parse import urlencode
from urllib.request import Request, urlopen

import psycopg2


SCOREBOARD_URL = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard"
SUMMARY_URL = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/summary"


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def fetch_json(url: str) -> dict:
    req = Request(url, headers={
        "User-Agent": "Mozilla/5.0",
        "Accept": "application/json",
        "Referer": "https://www.espn.com/"
    })
    with urlopen(req, timeout=30) as resp:
        return json.loads(resp.read().decode())


def season_date_range(season_end: int) -> tuple[str, str]:
    # NBA season runs roughly Oct -> Jun; we use Oct 1 to Jun 30
    start = f"{season_end-1}-10-01"
    end = f"{season_end}-06-30"
    return start, end


def iter_game_dates(conn, season_end: int, limit_dates: int = 0):
    cur = conn.cursor()
    start, end = season_date_range(season_end)
    cur.execute(
        """
        SELECT DISTINCT "gameDate"::date
        FROM "SportsGame"
        WHERE league='nba' AND season=%s
          AND "homeScore" IS NOT NULL AND "awayScore" IS NOT NULL
          AND "gameDate"::date BETWEEN %s::date AND %s::date
        ORDER BY "gameDate"::date DESC
        """,
        (season_end, start, end)
    )
    rows = [r[0] for r in cur.fetchall()]
    cur.close()
    if limit_dates and limit_dates > 0:
        rows = rows[:limit_dates]
    return rows


def ingest_referees(season_end: int, limit_dates: int = 0):
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    game_dates = iter_game_dates(conn, season_end, limit_dates=limit_dates)

    ref_stats = {}
    for dt in game_dates:
        date_str = dt.strftime('%Y%m%d')
        qs = urlencode({"dates": date_str})
        scoreboard = fetch_json(f"{SCOREBOARD_URL}?{qs}")
        events = scoreboard.get("events", [])
        for ev in events:
            comps = ev.get("competitions", [])
            if not comps:
                continue
            comp = comps[0]
            competitors = comp.get("competitors", [])
            if len(competitors) < 2:
                continue
            home = next((c for c in competitors if c.get("homeAway") == "home"), None)
            away = next((c for c in competitors if c.get("homeAway") == "away"), None)
            if not home or not away:
                continue

            # Pull summary for officials and team stats
            event_id = ev.get("id")
            if not event_id:
                continue
            summary = fetch_json(f"{SUMMARY_URL}?event={event_id}")
            boxscore = summary.get("boxscore", {})
            teams = boxscore.get("teams", [])

            total_pts = 0.0
            total_fta = 0.0
            total_pf = 0.0
            for t in teams:
                stats = t.get("statistics", [])
                # Map stats by name
                for s in stats:
                    name = s.get("name")
                    val = s.get("value")
                    if val is None:
                        continue
                    if name == "points":
                        total_pts += float(val)
                    elif name == "freeThrowsAttempted":
                        total_fta += float(val)
                    elif name == "fouls":
                        total_pf += float(val)

            officials = summary.get("gameInfo", {}).get("officials", [])
            if not officials:
                continue
            for off in officials:
                ref = off.get("displayName")
                if not ref:
                    continue
                ref_stats.setdefault(ref, {"games": 0, "total_pts": 0.0, "total_fta": 0.0, "total_pf": 0.0})
                ref_stats[ref]["games"] += 1
                ref_stats[ref]["total_pts"] += total_pts
                ref_stats[ref]["total_fta"] += total_fta
                ref_stats[ref]["total_pf"] += total_pf

            time.sleep(0.2)

    cur = conn.cursor()
    game_key = f"{season_end}_NBA_REF"
    game_date = datetime(season_end, 10, 1, tzinfo=timezone.utc)
    sql_upsert = """
        INSERT INTO "TeamGameMetric"
        (league, season, "gameKey", "gameDate", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", team, "statKey")
        DO UPDATE SET value = EXCLUDED.value
    """

    written = 0
    for ref, data in ref_stats.items():
        games = data["games"]
        if games <= 0:
            continue
        avg_total = data["total_pts"] / games
        avg_fta = data["total_fta"] / games
        avg_fouls = data["total_pf"] / games

        for stat_key, val in [
            ("nba_ref_avg_total", avg_total),
            ("nba_ref_avg_fta", avg_fta),
            ("nba_ref_avg_fouls", avg_fouls),
            ("nba_ref_games", games),
        ]:
            cur.execute(sql_upsert, ("nba", season_end, game_key, game_date, ref, stat_key, float(val)))
            written += 1

    conn.commit()
    cur.close()
    conn.close()
    print(f"✅ NBA referee metrics ingested (ESPN): {written}")


if __name__ == "__main__":
    import sys
    season = int(sys.argv[1]) if len(sys.argv) > 1 else 2025
    limit = int(sys.argv[2]) if len(sys.argv) > 2 else 0
    ingest_referees(season, limit_dates=limit)
