#!/usr/bin/env python3
"""
ETL: Populate Opening/Closing Lines from OddsSnapshot to SportsGame
Unlocks: Q2 (overnight moves), Q16 (book efficiency)

Run: Daily after games complete, or on-demand for backfill
"""
import psycopg2
import os
from datetime import datetime, timezone

def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def run_etl():
    print("=" * 60)
    print("ETL: SNAPSHOT TO GAME LINES")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    db_url = load_db_url()
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    # Ensure columns exist
    columns_to_add = [
        ('openingSpreadHome', 'NUMERIC'),
        ('openingSpreadAway', 'NUMERIC'),
        ('openingMoneylineHome', 'INTEGER'),
        ('openingMoneylineAway', 'INTEGER'),
        ('openingTotal', 'NUMERIC'),
        ('openingCapturedAt', 'TIMESTAMP'),
        ('closingSpreadHome', 'NUMERIC'),
        ('closingSpreadAway', 'NUMERIC'),
        ('closingMoneylineHome', 'INTEGER'),
        ('closingMoneylineAway', 'INTEGER'),
        ('closingTotal', 'NUMERIC'),
        ('closingCapturedAt', 'TIMESTAMP'),
    ]

    for col_name, col_type in columns_to_add:
        try:
            cur.execute(f'''
                ALTER TABLE "SportsGame" ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}
            ''')
        except Exception as e:
            pass  # Column may already exist
    conn.commit()

    # Step 1: Populate opening lines (earliest snapshot per game)
    # First by gameId, then by externalGameId for unlinked snapshots
    print("\nPopulating opening lines...")

    # 1a: Match by gameId
    cur.execute('''
        WITH earliest_snapshots AS (
            SELECT DISTINCT ON ("gameId")
                "gameId",
                "spreadHome",
                "spreadAway",
                "moneylineHome",
                "moneylineAway",
                "total",
                "snapshotAt"
            FROM "OddsSnapshot"
            WHERE "gameId" IS NOT NULL
            ORDER BY "gameId", "snapshotAt" ASC
        )
        UPDATE "SportsGame" g SET
            "openingSpreadHome" = e."spreadHome",
            "openingSpreadAway" = e."spreadAway",
            "openingMoneylineHome" = e."moneylineHome",
            "openingMoneylineAway" = e."moneylineAway",
            "openingTotal" = e."total",
            "openingCapturedAt" = e."snapshotAt"
        FROM earliest_snapshots e
        WHERE g.id = e."gameId"
          AND (g."openingSpreadHome" IS NULL OR g."openingCapturedAt" > e."snapshotAt")
    ''')
    opening_by_id = cur.rowcount

    # 1b: Match by externalGameId for snapshots without gameId
    cur.execute('''
        WITH earliest_ext_snapshots AS (
            SELECT DISTINCT ON ("externalGameId")
                "externalGameId",
                "spreadHome",
                "spreadAway",
                "moneylineHome",
                "moneylineAway",
                "total",
                "snapshotAt"
            FROM "OddsSnapshot"
            WHERE "gameId" IS NULL AND "externalGameId" IS NOT NULL
            ORDER BY "externalGameId", "snapshotAt" ASC
        )
        UPDATE "SportsGame" g SET
            "openingSpreadHome" = e."spreadHome",
            "openingSpreadAway" = e."spreadAway",
            "openingMoneylineHome" = e."moneylineHome",
            "openingMoneylineAway" = e."moneylineAway",
            "openingTotal" = e."total",
            "openingCapturedAt" = e."snapshotAt"
        FROM earliest_ext_snapshots e
        WHERE g."externalGameId" = e."externalGameId"
          AND (g."openingSpreadHome" IS NULL OR g."openingCapturedAt" > e."snapshotAt")
    ''')
    opening_by_ext = cur.rowcount
    opening_updated = opening_by_id + opening_by_ext
    print(f"  Opening lines updated: {opening_updated} ({opening_by_id} by gameId, {opening_by_ext} by externalGameId)")

    # Step 2: Populate closing lines (latest snapshot before game time)
    print("\nPopulating closing lines...")

    # 2a: Match by gameId
    cur.execute('''
        WITH latest_pregame_snapshots AS (
            SELECT DISTINCT ON (s."gameId")
                s."gameId",
                s."spreadHome",
                s."spreadAway",
                s."moneylineHome",
                s."moneylineAway",
                s."total",
                s."snapshotAt"
            FROM "OddsSnapshot" s
            JOIN "SportsGame" g ON s."gameId" = g.id
            WHERE s."gameId" IS NOT NULL
              AND s."snapshotAt" < g."gameDate"
            ORDER BY s."gameId", s."snapshotAt" DESC
        )
        UPDATE "SportsGame" g SET
            "closingSpreadHome" = l."spreadHome",
            "closingSpreadAway" = l."spreadAway",
            "closingMoneylineHome" = l."moneylineHome",
            "closingMoneylineAway" = l."moneylineAway",
            "closingTotal" = l."total",
            "closingCapturedAt" = l."snapshotAt"
        FROM latest_pregame_snapshots l
        WHERE g.id = l."gameId"
          AND (g."closingSpreadHome" IS NULL OR g."closingCapturedAt" < l."snapshotAt")
    ''')
    closing_by_id = cur.rowcount

    # 2b: Match by externalGameId
    cur.execute('''
        WITH latest_ext_pregame AS (
            SELECT DISTINCT ON (s."externalGameId")
                s."externalGameId",
                s."spreadHome",
                s."spreadAway",
                s."moneylineHome",
                s."moneylineAway",
                s."total",
                s."snapshotAt"
            FROM "OddsSnapshot" s
            JOIN "SportsGame" g ON s."externalGameId" = g."externalGameId"
            WHERE s."gameId" IS NULL
              AND s."externalGameId" IS NOT NULL
              AND s."snapshotAt" < g."gameDate"
            ORDER BY s."externalGameId", s."snapshotAt" DESC
        )
        UPDATE "SportsGame" g SET
            "closingSpreadHome" = l."spreadHome",
            "closingSpreadAway" = l."spreadAway",
            "closingMoneylineHome" = l."moneylineHome",
            "closingMoneylineAway" = l."moneylineAway",
            "closingTotal" = l."total",
            "closingCapturedAt" = l."snapshotAt"
        FROM latest_ext_pregame l
        WHERE g."externalGameId" = l."externalGameId"
          AND (g."closingSpreadHome" IS NULL OR g."closingCapturedAt" < l."snapshotAt")
    ''')
    closing_by_ext = cur.rowcount
    closing_updated = closing_by_id + closing_by_ext
    print(f"  Closing lines updated: {closing_updated} ({closing_by_id} by gameId, {closing_by_ext} by externalGameId)")

    conn.commit()

    # Report coverage
    print("\nCoverage Report:")
    cur.execute('''
        SELECT
            league,
            COUNT(*) as total_games,
            COUNT(*) FILTER (WHERE "openingSpreadHome" IS NOT NULL) as with_opening,
            COUNT(*) FILTER (WHERE "closingSpreadHome" IS NOT NULL) as with_closing
        FROM "SportsGame"
        WHERE "homeScore" IS NOT NULL
          AND league IN ('nba', 'nfl', 'nhl', 'mlb')
        GROUP BY league
        ORDER BY league
    ''')
    for row in cur.fetchall():
        league, total, opening, closing = row
        print(f"  {league}: {opening}/{total} opening ({100*opening/total:.1f}%), {closing}/{total} closing ({100*closing/total:.1f}%)")

    cur.close()
    conn.close()

    print("\n" + "=" * 60)
    print(f"ETL Complete: {opening_updated} opening, {closing_updated} closing")
    print("=" * 60)

    return opening_updated, closing_updated


if __name__ == '__main__':
    run_etl()
