#!/usr/bin/env python3
"""
Backfill NCAAB SportsGame rows from orphaned OddsSnapshot records.

Steps:
  1. Find all OddsSnapshot rows where league='ncaab' and gameId IS NULL.
  2. Get distinct (homeTeam, awayTeam, gameDate::date) combinations.
  3. For each combination not already covered by a SportsGame row, insert one.
  4. Link OddsSnapshot rows to their newly created (or pre-existing) SportsGame rows.

Usage:
    python3 ncaab_backfill_games_from_odds.py [--dry-run]
"""

import os
import sys
import argparse
import psycopg2
import psycopg2.extras
from datetime import datetime, date, timezone


# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

ENV_FILE = "/var/www/html/eventheodds/.env"


def load_env(path: str) -> dict:
    """Parse a simple KEY=VALUE .env file, ignoring comments and blank lines."""
    env = {}
    with open(path) as fh:
        for line in fh:
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            if "=" not in line:
                continue
            key, _, value = line.partition("=")
            key = key.strip()
            # Strip surrounding quotes if present
            value = value.strip()
            if len(value) >= 2 and value[0] in ('"', "'") and value[-1] == value[0]:
                value = value[1:-1]
            env[key] = value
    return env


def get_db_url() -> str:
    env = load_env(ENV_FILE)
    url = env.get("SPORTS_DATABASE_URL", "")
    if not url:
        raise RuntimeError("SPORTS_DATABASE_URL not found in " + ENV_FILE)
    return url


def connect(url: str):
    """Return a psycopg2 connection, stripping any ?schema=... suffix."""
    clean_url = url.split("?")[0]
    return psycopg2.connect(clean_url)


# ---------------------------------------------------------------------------
# Season calculation
# ---------------------------------------------------------------------------

def compute_season(game_date: date) -> int:
    """
    Return the academic-year season label for a given date.

    Convention observed in the DB:
      - Aug–Dec of year Y  -> season Y      (fall semester of Y/Y+1)
      - Jan–Jul of year Y  -> season Y-1    (spring semester of Y-1/Y)

    Examples:
      Nov 2025 -> 2025
      Jan 2026 -> 2025
      Apr 2026 -> 2025
      Nov 2024 -> 2024
      Jan 2025 -> 2024
    """
    if game_date.month >= 8:
        return game_date.year
    else:
        return game_date.year - 1


# ---------------------------------------------------------------------------
# Main logic
# ---------------------------------------------------------------------------

def run(dry_run: bool) -> None:
    today = date.today()
    print(f"[{datetime.now():%Y-%m-%d %H:%M:%S}] Starting NCAAB backfill from OddsSnapshot")
    print(f"  dry_run={dry_run}  today={today}")

    url = get_db_url()
    conn = connect(url)
    conn.autocommit = False
    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)

    # ------------------------------------------------------------------
    # Step 1: Count orphaned snapshots
    # ------------------------------------------------------------------
    cur.execute("""
        SELECT COUNT(*) AS cnt
        FROM "OddsSnapshot"
        WHERE league = 'ncaab'
          AND "gameId" IS NULL
    """)
    orphan_count = cur.fetchone()["cnt"]
    print(f"\nOrphaned OddsSnapshot rows (ncaab, gameId IS NULL): {orphan_count}")

    if orphan_count == 0:
        print("Nothing to do.")
        conn.close()
        return

    # ------------------------------------------------------------------
    # Step 2: Distinct (homeTeam, awayTeam, gameDate::date) combos
    # ------------------------------------------------------------------
    cur.execute("""
        SELECT
            "homeTeam",
            "awayTeam",
            "gameDate"::date AS game_date,
            COUNT(*) AS snapshot_count
        FROM "OddsSnapshot"
        WHERE league = 'ncaab'
          AND "gameId" IS NULL
        GROUP BY "homeTeam", "awayTeam", "gameDate"::date
        ORDER BY "gameDate"::date, "homeTeam"
    """)
    combos = cur.fetchall()
    print(f"Distinct (homeTeam, awayTeam, date) combos: {len(combos)}")

    # ------------------------------------------------------------------
    # Step 3: For each combo, check whether a SportsGame already exists
    # ------------------------------------------------------------------
    to_insert = []

    for row in combos:
        home = row["homeTeam"]
        away = row["awayTeam"]
        gdate = row["game_date"]          # Python date object
        n_snaps = row["snapshot_count"]

        cur.execute("""
            SELECT id
            FROM "SportsGame"
            WHERE league = 'ncaab'
              AND "homeTeam" = %s
              AND "awayTeam" = %s
              AND "gameDate"::date = %s
            LIMIT 1
        """, (home, away, gdate))
        existing = cur.fetchone()

        if existing:
            print(f"  SKIP  {gdate}  {away} @ {home}  -> already exists (id={existing['id']}, {n_snaps} snapshots)")
        else:
            season = compute_season(gdate)
            status = "final" if gdate < today else "scheduled"
            to_insert.append({
                "home": home,
                "away": away,
                "game_date": gdate,
                "season": season,
                "status": status,
                "n_snaps": n_snaps,
            })
            print(f"  QUEUE {gdate}  {away} @ {home}  season={season}  status={status}  ({n_snaps} snapshots)")

    print(f"\nGames to insert: {len(to_insert)}")

    if not to_insert:
        print("No new SportsGame rows needed.")
        conn.close()
        return

    # ------------------------------------------------------------------
    # Step 4: Insert new SportsGame rows
    # ------------------------------------------------------------------
    inserted = 0
    skipped_conflict = 0
    now_ts = datetime.now(timezone.utc)

    for g in to_insert:
        # gameDate: use midnight UTC on the game's calendar date
        game_dt = datetime(g["game_date"].year, g["game_date"].month, g["game_date"].day,
                           tzinfo=timezone.utc)

        if dry_run:
            print(f"  DRY-RUN INSERT: {g['game_date']}  {g['away']} @ {g['home']}"
                  f"  season={g['season']}  status={g['status']}")
            inserted += 1
            continue

        cur.execute("""
            INSERT INTO "SportsGame"
                (league, season, "gameDate", "homeTeam", "awayTeam", status, "updatedAt")
            VALUES
                ('ncaab', %s, %s, %s, %s, %s, %s)
            ON CONFLICT (league, season, "gameDate", "homeTeam", "awayTeam")
            DO NOTHING
            RETURNING id
        """, (
            g["season"],
            game_dt,
            g["home"],
            g["away"],
            g["status"],
            now_ts,
        ))
        result = cur.fetchone()
        if result:
            inserted += 1
            print(f"  INSERTED id={result['id']}  {g['game_date']}  {g['away']} @ {g['home']}"
                  f"  season={g['season']}  status={g['status']}")
        else:
            skipped_conflict += 1
            print(f"  CONFLICT (no-op)  {g['game_date']}  {g['away']} @ {g['home']}")

    print(f"\nInserted: {inserted}  Conflicts/skipped: {skipped_conflict}")

    # ------------------------------------------------------------------
    # Step 5: Link orphaned OddsSnapshots to their SportsGame rows
    # ------------------------------------------------------------------
    if dry_run:
        print("\nDRY-RUN: would now run UPDATE to link OddsSnapshot rows to SportsGame rows.")
        print("No changes committed.")
        conn.rollback()
        conn.close()
        return

    print("\nLinking orphaned OddsSnapshot rows to SportsGame rows...")
    cur.execute("""
        UPDATE "OddsSnapshot" os
        SET "gameId" = g.id
        FROM "SportsGame" g
        WHERE os.league = 'ncaab'
          AND os."gameId" IS NULL
          AND g.league = 'ncaab'
          AND g."homeTeam" = os."homeTeam"
          AND g."awayTeam" = os."awayTeam"
          AND g."gameDate"::date = os."gameDate"::date
    """)
    linked = cur.rowcount
    print(f"OddsSnapshot rows linked: {linked}")

    # Verify remaining orphans
    cur.execute("""
        SELECT COUNT(*) AS cnt
        FROM "OddsSnapshot"
        WHERE league = 'ncaab'
          AND "gameId" IS NULL
    """)
    remaining = cur.fetchone()["cnt"]
    print(f"Remaining orphaned OddsSnapshot rows: {remaining}")

    conn.commit()
    print(f"\n[{datetime.now():%Y-%m-%d %H:%M:%S}] Done. Committed.")

    cur.close()
    conn.close()


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description="Backfill NCAAB SportsGame rows from orphaned OddsSnapshot records."
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print what would be done without making any DB changes.",
    )
    args = parser.parse_args()

    try:
        run(dry_run=args.dry_run)
    except KeyboardInterrupt:
        print("\nInterrupted.")
        sys.exit(1)
    except Exception as exc:
        print(f"\nERROR: {exc}", file=sys.stderr)
        import traceback
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()
