#!/usr/bin/env python3
"""
Comprehensive Data Backfill Runner
Runs all available backfill scripts for maximum data coverage.

APIs Used:
- The Odds API: Historical odds (from June 2020)
- SportsGameOdds: Current half-lines, bookmaker data, props
- ESPN: Period scoring, referees, broadcasts
- Ball Don't Lie: NBA stats
"""

import subprocess
import sys
import os
from datetime import datetime, timedelta

# Change to project directory
os.chdir('/var/www/html/eventheodds')

LEAGUES = ['nba', 'nfl', 'nhl', 'mlb', 'ncaab', 'ncaaf']

def run_cmd(cmd, description, timeout=600):
    """Run a command and show output"""
    print(f"\n{'='*60}")
    print(f"RUNNING: {description}")
    print(f"CMD: {cmd}")
    print(f"{'='*60}")

    try:
        result = subprocess.run(
            cmd,
            shell=True,
            capture_output=False,
            timeout=timeout,
            cwd='/var/www/html/eventheodds'
        )
        return result.returncode == 0
    except subprocess.TimeoutExpired:
        print(f"TIMEOUT after {timeout}s")
        return False
    except Exception as e:
        print(f"ERROR: {e}")
        return False


def backfill_historical_odds():
    """Backfill historical opening/closing lines from The Odds API"""
    print("\n" + "="*60)
    print("PHASE 1: HISTORICAL ODDS BACKFILL (The Odds API)")
    print("="*60)

    # Fill missing games for each league
    for league in ['nba', 'nfl', 'nhl', 'mlb']:
        run_cmd(
            f'python3 scripts/historical_backfill_theoddsapi.py --missing --league {league}',
            f"Historical odds backfill - {league.upper()}",
            timeout=1800
        )


def backfill_period_scoring():
    """Backfill period scoring from ESPN"""
    print("\n" + "="*60)
    print("PHASE 2: PERIOD SCORING BACKFILL (ESPN)")
    print("="*60)

    run_cmd(
        'python3 scripts/backfill_period_scoring.py --leagues=nba,nfl,nhl --limit=500 --days=90',
        "Period scoring backfill (last 90 days)",
        timeout=1800
    )


def backfill_sportsgameodds():
    """Fetch current data from SportsGameOdds API"""
    print("\n" + "="*60)
    print("PHASE 3: SPORTSGAMEODDS DATA (Half-lines, Props, Bookmakers)")
    print("="*60)

    for league in LEAGUES:
        # Half/quarter lines
        run_cmd(
            f'python3 scripts/sportsgameodds_fetcher.py half -l {league}',
            f"Half/quarter lines - {league.upper()}",
            timeout=120
        )

        # Multi-bookmaker comparison
        run_cmd(
            f'python3 scripts/sportsgameodds_fetcher.py books -l {league}',
            f"Bookmaker comparison - {league.upper()}",
            timeout=120
        )

        # Player props (skip non-prop leagues)
        if league in ['nba', 'nfl', 'nhl', 'mlb']:
            run_cmd(
                f'python3 scripts/sportsgameodds_fetcher.py props -l {league}',
                f"Player props - {league.upper()}",
                timeout=120
            )


def backfill_theoddsapi_live():
    """Fetch current live odds from The Odds API"""
    print("\n" + "="*60)
    print("PHASE 4: THE ODDS API LIVE DATA")
    print("="*60)

    for league in ['nba', 'nfl', 'nhl', 'mlb']:
        run_cmd(
            f'python3 scripts/theoddsapi_enhanced_fetcher.py --live --league {league}',
            f"Live odds - {league.upper()}",
            timeout=120
        )

        run_cmd(
            f'python3 scripts/theoddsapi_enhanced_fetcher.py --props --league {league}',
            f"Props - {league.upper()}",
            timeout=120
        )


def backfill_espn_data():
    """Backfill ESPN data (referees, broadcasts)"""
    print("\n" + "="*60)
    print("PHASE 5: ESPN DATA BACKFILL")
    print("="*60)

    # Referees
    run_cmd(
        'python3 scripts/fetch_espn_referees.py --leagues=nba,nfl,nhl --days=30',
        "ESPN referees (last 30 days)",
        timeout=600
    )

    # Broadcasts
    run_cmd(
        'python3 scripts/fetch_broadcast_info.py --leagues=nba,nfl,nhl --days-back=30 --days-forward=7',
        "ESPN broadcasts",
        timeout=600
    )


def backfill_opening_closing():
    """ETL opening/closing lines from snapshots"""
    print("\n" + "="*60)
    print("PHASE 6: ETL OPENING/CLOSING LINES")
    print("="*60)

    run_cmd(
        'python3 scripts/etl_opening_closing_lines.py',
        "ETL snapshots to game lines",
        timeout=300
    )

    run_cmd(
        'python3 scripts/backfill_opening_closing_from_gameodds.py',
        "Backfill from GameOdds table",
        timeout=300
    )


def show_summary():
    """Show final data summary"""
    print("\n" + "="*60)
    print("BACKFILL COMPLETE - DATA SUMMARY")
    print("="*60)

    import psycopg2
    conn = psycopg2.connect('postgresql://eventheodds:eventheodds_dev_password@127.0.0.1:5433/eventheodds_sports')
    cur = conn.cursor()

    cur.execute("""
        SELECT 'OddsSnapshot' as tbl, COUNT(*) FROM "OddsSnapshot"
        UNION ALL SELECT 'PropSnapshot', COUNT(*) FROM "PropSnapshot"
        UNION ALL SELECT 'GameHalfLine', COUNT(*) FROM "GameHalfLine"
        UNION ALL SELECT 'BookmakerOddsSnapshot', COUNT(*) FROM "BookmakerOddsSnapshot"
        UNION ALL SELECT 'LiveOddsSnapshot', COUNT(*) FROM "LiveOddsSnapshot"
        ORDER BY 2 DESC
    """)

    print("\nOdds Data Counts:")
    for row in cur.fetchall():
        print(f"  {row[0]:25} {row[1]:,}")

    cur.execute("""
        SELECT league,
               COUNT(*) as total,
               COUNT("openingSpreadHome") as has_opening,
               ROUND(100.0 * COUNT("openingSpreadHome") / NULLIF(COUNT(*), 0), 1) as pct
        FROM "SportsGame"
        WHERE "gameDate" > '2024-01-01'
        AND league IN ('nba', 'nfl', 'nhl', 'mlb', 'ncaab', 'ncaaf')
        GROUP BY league
        ORDER BY total DESC
    """)

    print("\nOpening Lines Coverage:")
    for row in cur.fetchall():
        print(f"  {row[0]:10} {row[1]:6,} games, {row[2]:5,} with opening ({row[3]}%)")

    conn.close()


def main():
    print("="*60)
    print("COMPREHENSIVE DATA BACKFILL")
    print(f"Started: {datetime.now()}")
    print("="*60)

    if len(sys.argv) > 1:
        phase = sys.argv[1]
        if phase == 'historical':
            backfill_historical_odds()
        elif phase == 'period':
            backfill_period_scoring()
        elif phase == 'sgo':
            backfill_sportsgameodds()
        elif phase == 'theodds':
            backfill_theoddsapi_live()
        elif phase == 'espn':
            backfill_espn_data()
        elif phase == 'etl':
            backfill_opening_closing()
        elif phase == 'summary':
            show_summary()
        else:
            print(f"Unknown phase: {phase}")
            print("Available: historical, period, sgo, theodds, espn, etl, summary")
    else:
        # Run all phases
        backfill_sportsgameodds()      # Current half-lines, props, bookmakers
        backfill_theoddsapi_live()     # Current live odds
        backfill_opening_closing()     # ETL to game lines
        backfill_period_scoring()      # ESPN period scores
        backfill_espn_data()           # ESPN refs & broadcasts
        # backfill_historical_odds()   # Historical - expensive, run separately

        show_summary()

    print(f"\nCompleted: {datetime.now()}")


if __name__ == '__main__':
    main()
