#!/usr/bin/env python3
"""
Backfill 2 weeks of historical data from TheSportsDB v2.
Fetches full season schedules for priority leagues and filters to last 14 days.
Also fetches event details (lineups, stats, timelines, highlights) for all finished games.

Run once manually, then the cron jobs take over.
"""
import json
import hashlib
import requests
import psycopg2
import time as _time
from datetime import datetime, timezone, timedelta

API_KEY = "428892"
BASE = "https://www.thesportsdb.com/api/v2/json"
HEADERS = {"X-API-KEY": API_KEY}

PRIORITY_LEAGUES = {
    "4387": ("nba", "NBA", "2025-2026"),
    "4391": ("nfl", "NFL", "2025"),
    "4380": ("nhl", "NHL", "2025-2026"),
    "4424": ("mlb", "MLB", "2025"),
    "4607": ("ncaab", "NCAAB", "2025-2026"),
    "4479": ("ncaaf", "NCAAF", "2025"),
    "4328": ("epl", "English Premier League", "2025-2026"),
    "4335": ("laliga", "Spanish La Liga", "2025-2026"),
    "4331": ("bundesliga", "German Bundesliga", "2025-2026"),
    "4332": ("seriea", "Italian Serie A", "2025-2026"),
    "4334": ("ligue1", "French Ligue 1", "2025-2026"),
    "4346": ("mls", "American MLS", "2026"),
    "4480": ("ucl", "UEFA Champions League", "2025-2026"),
    "4443": ("ufc", "UFC", "2026"),
}

DETAIL_ENDPOINTS = {
    "lineup": "lookup/event_lineup",
    "stats": "lookup/event_stats",
    "timeline": "lookup/event_timeline",
    "highlights": "lookup/event_highlights",
}


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().strip('"').split('?')[0]
    return ''


def sha256(data):
    return hashlib.sha256(json.dumps(data, sort_keys=True).encode()).hexdigest()


def _int(v):
    if v is None:
        return None
    try:
        return int(v)
    except (ValueError, TypeError):
        return None


def fetch_season(league_id, season):
    url = f"{BASE}/schedule/league/{league_id}/{season}"
    try:
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"    HTTP {resp.status_code} for {league_id}/{season}")
            return []
        data = resp.json()
        return data.get("schedule") or data.get("events") or data.get("table") or []
    except Exception as e:
        print(f"    Error: {e}")
        return []


def fetch_detail(endpoint, event_id):
    url = f"{BASE}/{endpoint}/{event_id}"
    try:
        resp = requests.get(url, headers=HEADERS, timeout=15)
        if resp.status_code != 200:
            return None
        data = resp.json()
        for key in data:
            if data[key]:
                return data[key] if isinstance(data[key], list) else [data[key]]
        return None
    except Exception:
        return None


def main():
    db_url = load_db_url()
    if not db_url:
        print("ERROR: No SPORTS_DATABASE_URL")
        return

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    now = datetime.now(timezone.utc)
    cutoff = now - timedelta(days=14)
    cutoff_str = cutoff.strftime("%Y-%m-%d")

    total_games = 0
    total_feed = 0
    total_details = 0
    finished_event_ids = []

    print(f"Backfilling events from {cutoff_str} to today")
    print("=" * 60)

    for league_id, (code, name, season) in PRIORITY_LEAGUES.items():
        print(f"\n--- {name} ({season}) ---")
        events = fetch_season(league_id, season)
        if not events:
            print(f"  No events found")
            continue

        # Filter to last 14 days
        recent = [e for e in events if (e.get("dateEvent") or "") >= cutoff_str]
        print(f"  {len(events)} total events, {len(recent)} in last 14 days")

        for ev in recent:
            event_id = ev.get("idEvent", "")
            if not event_id:
                continue

            ext_id = f"tsdb_{event_id}"
            date_str = ev.get("dateEvent", "")
            time_str = ev.get("strTime") or ev.get("strTimeLocal") or "00:00:00"
            game_date = f"{date_str} {time_str[:8]}" if time_str and len(time_str) >= 5 else f"{date_str} 00:00:00"

            home = ev.get("strHomeTeam") or ev.get("strEvent", "Unknown")
            away = ev.get("strAwayTeam") or "TBD"
            home_score = _int(ev.get("intHomeScore"))
            away_score = _int(ev.get("intAwayScore"))
            status = "final" if home_score is not None and away_score is not None else "scheduled"

            try:
                season_year = int(season.split("-")[0])
            except (ValueError, IndexError):
                season_year = now.year

            # Upsert SportsGame
            cur.execute("""
                INSERT INTO "SportsGame" (
                    league, season, "gameDate", "homeTeam", "awayTeam",
                    "externalGameId", "homeScore", "awayScore", status, raw,
                    "createdAt", "updatedAt"
                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                ON CONFLICT (league, "externalGameId") DO UPDATE SET
                    "homeScore" = COALESCE(EXCLUDED."homeScore", "SportsGame"."homeScore"),
                    "awayScore" = COALESCE(EXCLUDED."awayScore", "SportsGame"."awayScore"),
                    status = CASE WHEN EXCLUDED.status = 'final' THEN 'final' ELSE "SportsGame".status END,
                    raw = EXCLUDED.raw,
                    "updatedAt" = EXCLUDED."updatedAt"
                WHERE EXCLUDED."homeScore" IS NOT NULL OR "SportsGame"."homeScore" IS NULL
            """, (
                code, season_year, game_date, home, away,
                ext_id, home_score, away_score, status,
                json.dumps(ev), now, now,
            ))
            total_games += 1

            # Store raw in ExternalFeedRecord too
            content_hash = sha256(ev)
            cur.execute("""
                INSERT INTO "ExternalFeedRecord"
                    (source, kind, league, season, "contentHash", "fetchedAt", raw, "createdAt")
                VALUES ('thesportsdb', 'schedule', %s, %s, %s, %s, %s, %s)
                ON CONFLICT (source, "contentHash") DO NOTHING
            """, (code, season, content_hash, now, json.dumps(ev), now))
            total_feed += cur.rowcount

            if status == "final":
                finished_event_ids.append(event_id)

        conn.commit()
        _time.sleep(0.5)  # Rate limiting

    print(f"\n{'=' * 60}")
    print(f"Games upserted: {total_games}")
    print(f"Feed records: {total_feed}")
    print(f"Finished events to enrich: {len(finished_event_ids)}")

    # Fetch event details for finished games
    print(f"\nFetching event details...")
    for kind, endpoint in DETAIL_ENDPOINTS.items():
        print(f"\n  --- {kind} ---")
        for i, eid in enumerate(finished_event_ids):
            records = fetch_detail(endpoint, eid)
            if records:
                for rec in records:
                    if not isinstance(rec, dict):
                        continue
                    rec["idEvent"] = eid
                    ch = sha256(rec)
                    try:
                        cur.execute("""
                            INSERT INTO "ExternalFeedRecord"
                                (source, kind, league, season, "contentHash", "fetchedAt", raw, "createdAt")
                            VALUES ('thesportsdb', %s, NULL, NULL, %s, %s, %s, %s)
                            ON CONFLICT (source, "contentHash") DO NOTHING
                        """, (kind, ch, now, json.dumps(rec), now))
                        total_details += cur.rowcount
                    except Exception:
                        conn.rollback()

            if (i + 1) % 20 == 0:
                conn.commit()
                print(f"    {i+1}/{len(finished_event_ids)} processed")
                _time.sleep(0.3)

        conn.commit()

    conn.close()
    print(f"\nBACKFILL COMPLETE: {total_games} games, {total_feed} feed records, {total_details} detail records")


if __name__ == "__main__":
    main()
