#!/usr/bin/env python3
"""
Critical Data Gap Backfill Script v2
Uses working APIs and endpoints
"""
import requests
import psycopg2
import json
import os
import time
from datetime import datetime, timezone

REQUEST_DELAY = 0.3

def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def get_db_connection():
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')
    return psycopg2.connect(db_url)


# =============================================================================
# NHL GOALIE STATS - Using NHL Stats REST API
# =============================================================================
def fetch_all_nhl_goalies(season_id='20252026'):
    """Fetch all NHL goalies from stats REST API"""
    url = f'https://api.nhle.com/stats/rest/en/goalie/summary'
    params = {
        'isAggregate': 'false',
        'isGame': 'false',
        'sort': '[{"property":"gamesPlayed","direction":"DESC"}]',
        'start': 0,
        'limit': 200,
        'cayenneExp': f'seasonId={season_id} and gameTypeId=2'
    }

    try:
        resp = requests.get(url, params=params, timeout=30)
        if resp.status_code == 200:
            data = resp.json()
            return data.get('data', [])
    except Exception as e:
        print(f"  Error: {e}")
    return []


def ensure_goalie_table(cur):
    """Create NHL goalie stats table"""
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "NHLGoalieStats" (
            id BIGSERIAL PRIMARY KEY,
            season VARCHAR(20),
            "playerId" INT,
            "goalieFullName" VARCHAR(100),
            "teamAbbrev" VARCHAR(10),
            "gamesPlayed" INT,
            "gamesStarted" INT,
            wins INT,
            losses INT,
            "otLosses" INT,
            "savePctg" FLOAT,
            "goalsAgainstAvg" FLOAT,
            shutouts INT,
            "shotsAgainst" INT,
            saves INT,
            "goalsAgainst" INT,
            "timeOnIce" INT,
            "updatedAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE(season, "playerId")
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS "NHLGoalieStats_team_idx" ON "NHLGoalieStats" ("teamAbbrev", season)')


def backfill_nhl_goalies():
    """Backfill NHL goalie stats"""
    print("\n" + "="*60)
    print("NHL GOALIE STATS BACKFILL")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    ensure_goalie_table(cur)
    conn.commit()

    total = 0
    for season_id in ['20252026', '20242025']:
        print(f"\nFetching goalies for season {season_id}...")
        goalies = fetch_all_nhl_goalies(season_id)
        print(f"  Found {len(goalies)} goalies")

        for goalie in goalies:
            try:
                cur.execute('''
                    INSERT INTO "NHLGoalieStats" (
                        season, "playerId", "goalieFullName", "teamAbbrev",
                        "gamesPlayed", "gamesStarted", wins, losses, "otLosses",
                        "savePctg", "goalsAgainstAvg", shutouts, "shotsAgainst",
                        saves, "goalsAgainst", "timeOnIce"
                    )
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (season, "playerId") DO UPDATE SET
                        "gamesPlayed" = EXCLUDED."gamesPlayed",
                        "gamesStarted" = EXCLUDED."gamesStarted",
                        wins = EXCLUDED.wins,
                        losses = EXCLUDED.losses,
                        "otLosses" = EXCLUDED."otLosses",
                        "savePctg" = EXCLUDED."savePctg",
                        "goalsAgainstAvg" = EXCLUDED."goalsAgainstAvg",
                        shutouts = EXCLUDED.shutouts,
                        "shotsAgainst" = EXCLUDED."shotsAgainst",
                        saves = EXCLUDED.saves,
                        "goalsAgainst" = EXCLUDED."goalsAgainst",
                        "timeOnIce" = EXCLUDED."timeOnIce",
                        "updatedAt" = NOW()
                ''', (
                    season_id, goalie.get('playerId'), goalie.get('goalieFullName'),
                    goalie.get('teamAbbrevs') or goalie.get('lastTeamAbbrev'),
                    goalie.get('gamesPlayed'), goalie.get('gamesStarted'),
                    goalie.get('wins'), goalie.get('losses'), goalie.get('otLosses'),
                    goalie.get('savePctg'), goalie.get('goalsAgainstAverage'),
                    goalie.get('shutouts'), goalie.get('shotsAgainst'),
                    goalie.get('saves'), goalie.get('goalsAgainst'),
                    goalie.get('timeOnIce')
                ))
                total += 1
            except Exception as e:
                print(f"    Error: {e}")

        conn.commit()

    cur.close()
    conn.close()

    print(f"\n✅ Inserted/updated {total} goalie records")
    return total


# =============================================================================
# NBA LINEUP - Using Basketball Reference / pbpstats alternative
# =============================================================================
def fetch_nba_lineups_pbpstats(team_abbr, season='2024-25'):
    """Fetch lineup data from pbpstats.com API"""
    # pbpstats provides free lineup data
    season_formatted = season.replace('-', '-20')  # 2024-25 -> 2024-2025

    url = f'https://api.pbpstats.com/get-lineup-stats/{season_formatted}'
    params = {
        'Season': season_formatted,
        'SeasonType': 'Regular Season',
        'Team': team_abbr,
        'Lineup': '5',  # 5-man lineups
    }

    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code == 200:
            return resp.json()
    except Exception as e:
        pass
    return None


def ensure_nba_lineup_tables(cur):
    """Create NBA lineup tables"""
    cur.execute('DROP TABLE IF EXISTS "NBALineup" CASCADE')
    cur.execute('DROP TABLE IF EXISTS "NBAPlayerOnOff" CASCADE')

    cur.execute('''
        CREATE TABLE "NBALineup" (
            id BIGSERIAL PRIMARY KEY,
            season VARCHAR(20),
            team_id INT,
            team VARCHAR(50),
            lineup_id VARCHAR(100),
            player1 VARCHAR(100),
            player2 VARCHAR(100),
            player3 VARCHAR(100),
            player4 VARCHAR(100),
            player5 VARCHAR(100),
            minutes FLOAT,
            plus_minus FLOAT,
            off_rating FLOAT,
            def_rating FLOAT,
            net_rating FLOAT,
            pace FLOAT,
            gp INT,
            wins INT,
            losses INT,
            updated_at TIMESTAMP DEFAULT NOW(),
            UNIQUE(season, team, lineup_id)
        )
    ''')

    cur.execute('''
        CREATE TABLE "NBAPlayerOnOff" (
            id BIGSERIAL PRIMARY KEY,
            season VARCHAR(20),
            player_id INT,
            player_name VARCHAR(100),
            team_id INT,
            team VARCHAR(50),
            on_court_min FLOAT,
            on_court_plus_minus FLOAT,
            on_court_off_rtg FLOAT,
            on_court_def_rtg FLOAT,
            on_court_net_rtg FLOAT,
            off_court_min FLOAT,
            off_court_plus_minus FLOAT,
            off_court_off_rtg FLOAT,
            off_court_def_rtg FLOAT,
            off_court_net_rtg FLOAT,
            on_off_diff FLOAT,
            updated_at TIMESTAMP DEFAULT NOW(),
            UNIQUE(season, player_id)
        )
    ''')

    cur.execute('CREATE INDEX idx_nbalineup_team ON "NBALineup" (team, season)')
    cur.execute('CREATE INDEX idx_nbaplayeronoff_team ON "NBAPlayerOnOff" (team, season)')


def fetch_nba_from_balldontlie():
    """Use balldontlie API for basic team/player data"""
    # balldontlie is free and doesn't block
    url = 'https://api.balldontlie.io/v1/teams'
    headers = {'Authorization': 'your-api-key'}  # Free tier available

    # Note: balldontlie doesn't have lineup data, but provides basic stats
    # For lineup data, we need to compute from play-by-play
    pass


def backfill_nba_lineups_from_calculated():
    """Calculate lineup data from game metrics we already have"""
    print("\n" + "="*60)
    print("NBA LINEUP DATA CALCULATION")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    ensure_nba_lineup_tables(cur)
    conn.commit()

    # Calculate player on/off impact from PlayerImpact table
    cur.execute('''
        SELECT COUNT(*) FROM "PlayerImpact" WHERE league = 'nba'
    ''')
    impact_count = cur.fetchone()[0]
    print(f"Found {impact_count} PlayerImpact records for NBA")

    if impact_count > 0:
        # Copy relevant data to NBAPlayerOnOff
        cur.execute('''
            INSERT INTO "NBAPlayerOnOff" (
                season, player_id, player_name, team, on_off_diff
            )
            SELECT
                CAST(season AS VARCHAR),
                CAST("playerId" AS INT),
                "playerName",
                team,
                "onOffNetRtgDiff"
            FROM "PlayerImpact"
            WHERE league = 'nba'
              AND "onOffNetRtgDiff" IS NOT NULL
            ON CONFLICT (season, player_id) DO UPDATE SET
                on_off_diff = EXCLUDED.on_off_diff,
                updated_at = NOW()
        ''')
        copied = cur.rowcount
        conn.commit()
        print(f"  Copied {copied} player on/off records from PlayerImpact")
    else:
        copied = 0

    cur.close()
    conn.close()

    print(f"✅ NBA On/Off: {copied} records")
    return {'lineups': 0, 'on_off': copied}


# =============================================================================
# SOCCER HALF SCORING
# =============================================================================
def fetch_espn_soccer_scoreboard(league='eng.1', date_str=None):
    """Fetch soccer scoreboard with period scores"""
    if date_str:
        url = f'https://site.api.espn.com/apis/site/v2/sports/soccer/{league}/scoreboard?dates={date_str}'
    else:
        url = f'https://site.api.espn.com/apis/site/v2/sports/soccer/{league}/scoreboard'

    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code == 200:
            return resp.json()
    except Exception as e:
        pass
    return None


def ensure_soccer_period_table(cur):
    """Create soccer period scoring table"""
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "SoccerPeriodScoring" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(20),
            "gameId" BIGINT,
            "externalGameId" VARCHAR(100),
            "gameDate" TIMESTAMP,
            "homeTeam" VARCHAR(100),
            "awayTeam" VARCHAR(100),
            "home1H" INT,
            "home2H" INT,
            "away1H" INT,
            "away2H" INT,
            "homeTotal" INT,
            "awayTotal" INT,
            "createdAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE(league, "externalGameId")
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS "SoccerPeriodScoring_league_idx" ON "SoccerPeriodScoring" (league, "gameDate")')


def backfill_soccer_half_scoring():
    """Backfill soccer half scoring from ESPN API"""
    print("\n" + "="*60)
    print("SOCCER HALF SCORING BACKFILL")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    ensure_soccer_period_table(cur)
    conn.commit()

    total = 0

    # Fetch recent dates
    from datetime import datetime, timedelta
    base_date = datetime.now()

    for days_back in range(0, 90, 1):
        date = base_date - timedelta(days=days_back)
        date_str = date.strftime('%Y%m%d')

        time.sleep(REQUEST_DELAY)
        data = fetch_espn_soccer_scoreboard('eng.1', date_str)

        if not data or 'events' not in data:
            continue

        for event in data['events']:
            event_id = event.get('id')
            if not event_id:
                continue

            competitions = event.get('competitions', [])
            if not competitions:
                continue

            comp = competitions[0]
            status = comp.get('status', {}).get('type', {}).get('completed', False)
            if not status:
                continue  # Skip incomplete games

            competitors = comp.get('competitors', [])
            if len(competitors) != 2:
                continue

            home_data = None
            away_data = None

            for c in competitors:
                if c.get('homeAway') == 'home':
                    home_data = c
                else:
                    away_data = c

            if not home_data or not away_data:
                continue

            # Get period scores
            home_linescores = home_data.get('linescores', [])
            away_linescores = away_data.get('linescores', [])

            if len(home_linescores) >= 2 and len(away_linescores) >= 2:
                try:
                    home_1h = int(home_linescores[0].get('value', 0))
                    home_2h = int(home_linescores[1].get('value', 0))
                    away_1h = int(away_linescores[0].get('value', 0))
                    away_2h = int(away_linescores[1].get('value', 0))

                    cur.execute('''
                        INSERT INTO "SoccerPeriodScoring" (
                            league, "externalGameId", "gameDate",
                            "homeTeam", "awayTeam",
                            "home1H", "home2H", "away1H", "away2H",
                            "homeTotal", "awayTotal"
                        )
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        ON CONFLICT (league, "externalGameId") DO UPDATE SET
                            "home1H" = EXCLUDED."home1H",
                            "home2H" = EXCLUDED."home2H",
                            "away1H" = EXCLUDED."away1H",
                            "away2H" = EXCLUDED."away2H"
                    ''', (
                        'epl', event_id, event.get('date'),
                        home_data.get('team', {}).get('displayName'),
                        away_data.get('team', {}).get('displayName'),
                        home_1h, home_2h, away_1h, away_2h,
                        int(home_data.get('score', 0)),
                        int(away_data.get('score', 0))
                    ))
                    total += 1
                except Exception as e:
                    pass

        if total > 0 and total % 50 == 0:
            print(f"  Processed {total} matches...")
            conn.commit()

    conn.commit()
    cur.close()
    conn.close()

    print(f"✅ Added half scoring for {total} soccer matches")
    return total


# =============================================================================
# INJURY TIMESTAMP TRACKING
# =============================================================================
def ensure_injury_history_table(cur):
    """Create injury status history table"""
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "InjuryStatusHistory" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(20),
            "playerId" VARCHAR(100),
            "playerName" VARCHAR(200),
            team VARCHAR(50),
            status VARCHAR(50),
            "previousStatus" VARCHAR(50),
            injury VARCHAR(200),
            "statusChangedAt" TIMESTAMP,
            "gameId" BIGINT,
            "gameDate" TIMESTAMP,
            "hoursBeforeGame" FLOAT,
            "createdAt" TIMESTAMP DEFAULT NOW()
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS "InjuryStatusHistory_player_idx" ON "InjuryStatusHistory" ("playerId", "statusChangedAt")')


def backfill_injury_timestamps():
    """Add timestamp tracking to existing injury records"""
    print("\n" + "="*60)
    print("INJURY TIMESTAMP TRACKING")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    ensure_injury_history_table(cur)
    conn.commit()

    # Add columns if needed
    try:
        cur.execute('''
            ALTER TABLE "PlayerInjury"
            ADD COLUMN IF NOT EXISTS "statusChangedAt" TIMESTAMP,
            ADD COLUMN IF NOT EXISTS "previousStatus" VARCHAR(50)
        ''')
        conn.commit()
    except:
        conn.rollback()

    # Set timestamps for existing records
    cur.execute('''
        UPDATE "PlayerInjury"
        SET "statusChangedAt" = COALESCE("statusChangedAt", "updatedAt", "createdAt", NOW())
        WHERE "statusChangedAt" IS NULL
    ''')
    updated = cur.rowcount
    conn.commit()

    # Create snapshot
    cur.execute('''
        INSERT INTO "InjuryStatusHistory" (
            league, "playerId", "playerName", team, status, injury, "statusChangedAt"
        )
        SELECT
            league,
            "playerExternalId",
            "playerName",
            team,
            status,
            injury,
            COALESCE("statusChangedAt", "updatedAt", NOW())
        FROM "PlayerInjury"
        WHERE status IS NOT NULL
        ON CONFLICT DO NOTHING
    ''')
    snapshot_count = cur.rowcount
    conn.commit()

    cur.close()
    conn.close()

    print(f"  Updated {updated} injury records with timestamps")
    print(f"✅ Created {snapshot_count} injury history records")
    return snapshot_count


# =============================================================================
# MAIN
# =============================================================================
def main():
    print("="*60)
    print("CRITICAL DATA GAP BACKFILL v2")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("="*60)

    results = {}

    # 1. NHL Goalie Stats (using working REST API)
    try:
        results['nhl_goalies'] = backfill_nhl_goalies()
    except Exception as e:
        print(f"NHL Goalie backfill failed: {e}")
        import traceback
        traceback.print_exc()
        results['nhl_goalies'] = 0

    # 2. NBA Lineup from existing data
    try:
        results['nba_lineups'] = backfill_nba_lineups_from_calculated()
    except Exception as e:
        print(f"NBA Lineup backfill failed: {e}")
        import traceback
        traceback.print_exc()
        results['nba_lineups'] = {'lineups': 0, 'on_off': 0}

    # 3. Soccer Half Scoring
    try:
        results['soccer_halves'] = backfill_soccer_half_scoring()
    except Exception as e:
        print(f"Soccer half scoring backfill failed: {e}")
        import traceback
        traceback.print_exc()
        results['soccer_halves'] = 0

    # 4. Injury Timestamps
    try:
        results['injury_timestamps'] = backfill_injury_timestamps()
    except Exception as e:
        print(f"Injury timestamp backfill failed: {e}")
        import traceback
        traceback.print_exc()
        results['injury_timestamps'] = 0

    print("\n" + "="*60)
    print("BACKFILL COMPLETE")
    print("="*60)
    print(f"NHL Goalies: {results.get('nhl_goalies', 0)}")
    print(f"NBA On/Off: {results.get('nba_lineups', {}).get('on_off', 0)}")
    print(f"Soccer Half Scores: {results.get('soccer_halves', 0)}")
    print(f"Injury History: {results.get('injury_timestamps', 0)}")
    print("="*60)


if __name__ == '__main__':
    main()
