#!/usr/bin/env python3
"""
Critical Data Gap Backfill - Final Working Version
"""
import requests
import psycopg2
import os
import time
from datetime import datetime, timezone, timedelta

REQUEST_DELAY = 0.3

def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def get_db_connection():
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')
    return psycopg2.connect(db_url)


# =============================================================================
# 1. NHL GOALIE STATS
# =============================================================================
def backfill_nhl_goalies():
    """Backfill NHL goalie stats from NHL Stats REST API"""
    print("\n" + "="*60)
    print("1. NHL GOALIE STATS BACKFILL")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    # Create table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "NHLGoalieStats" (
            id BIGSERIAL PRIMARY KEY,
            season VARCHAR(20),
            player_id INT,
            goalie_name VARCHAR(100),
            team_abbrev VARCHAR(10),
            games_played INT,
            games_started INT,
            wins INT,
            losses INT,
            ot_losses INT,
            save_pctg FLOAT,
            goals_against_avg FLOAT,
            shutouts INT,
            shots_against INT,
            saves INT,
            goals_against INT,
            time_on_ice INT,
            updated_at TIMESTAMP DEFAULT NOW(),
            UNIQUE(season, player_id)
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS idx_nhlgoalie_team ON "NHLGoalieStats" (team_abbrev, season)')
    conn.commit()

    total = 0

    for season_id in ['20252026', '20242025']:
        print(f"\nFetching goalies for season {season_id}...")

        url = 'https://api.nhle.com/stats/rest/en/goalie/summary'
        params = {
            'isAggregate': 'false',
            'isGame': 'false',
            'sort': '[{"property":"gamesPlayed","direction":"DESC"}]',
            'start': 0,
            'limit': 200,
            'cayenneExp': f'seasonId={season_id} and gameTypeId=2'
        }

        try:
            resp = requests.get(url, params=params, timeout=30)
            if resp.status_code != 200:
                print(f"  API returned {resp.status_code}")
                continue

            data = resp.json()
            goalies = data.get('data', [])
            print(f"  Found {len(goalies)} goalies")

            for goalie in goalies:
                try:
                    cur.execute('''
                        INSERT INTO "NHLGoalieStats" (
                            season, player_id, goalie_name, team_abbrev,
                            games_played, games_started, wins, losses, ot_losses,
                            save_pctg, goals_against_avg, shutouts, shots_against,
                            saves, goals_against, time_on_ice
                        )
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        ON CONFLICT (season, player_id) DO UPDATE SET
                            games_played = EXCLUDED.games_played,
                            wins = EXCLUDED.wins,
                            losses = EXCLUDED.losses,
                            save_pctg = EXCLUDED.save_pctg,
                            goals_against_avg = EXCLUDED.goals_against_avg,
                            shutouts = EXCLUDED.shutouts,
                            saves = EXCLUDED.saves,
                            updated_at = NOW()
                    ''', (
                        season_id, goalie.get('playerId'), goalie.get('goalieFullName'),
                        goalie.get('teamAbbrevs') or goalie.get('lastTeamAbbrev'),
                        goalie.get('gamesPlayed'), goalie.get('gamesStarted'),
                        goalie.get('wins'), goalie.get('losses'), goalie.get('otLosses'),
                        goalie.get('savePctg'), goalie.get('goalsAgainstAverage'),
                        goalie.get('shutouts'), goalie.get('shotsAgainst'),
                        goalie.get('saves'), goalie.get('goalsAgainst'),
                        goalie.get('timeOnIce')
                    ))
                    total += 1
                except Exception as e:
                    print(f"    Insert error: {e}")
                    conn.rollback()

            conn.commit()

        except Exception as e:
            print(f"  Fetch error: {e}")
            conn.rollback()

    cur.close()
    conn.close()

    print(f"\n✅ Inserted {total} goalie records")
    return total


# =============================================================================
# 2. NBA PLAYER ON/OFF (from PlayerImpact)
# =============================================================================
def backfill_nba_on_off():
    """Copy NBA player on/off impact from existing PlayerImpact table"""
    print("\n" + "="*60)
    print("2. NBA PLAYER ON/OFF BACKFILL")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    # Create table
    cur.execute('DROP TABLE IF EXISTS "NBAPlayerOnOff" CASCADE')
    cur.execute('''
        CREATE TABLE "NBAPlayerOnOff" (
            id BIGSERIAL PRIMARY KEY,
            season VARCHAR(20),
            player_id VARCHAR(100),
            player_name VARCHAR(100),
            team VARCHAR(50),
            games_played INT,
            games_missed INT,
            team_wins_with INT,
            team_losses_with INT,
            team_wins_without INT,
            team_losses_without INT,
            win_pct_with FLOAT,
            win_pct_without FLOAT,
            win_pct_diff FLOAT,
            avg_pts_with FLOAT,
            avg_pts_without FLOAT,
            net_impact FLOAT,
            updated_at TIMESTAMP DEFAULT NOW(),
            UNIQUE(season, player_id)
        )
    ''')
    cur.execute('CREATE INDEX idx_nbaonoff_team ON "NBAPlayerOnOff" (team, season)')
    cur.execute('CREATE INDEX idx_nbaonoff_impact ON "NBAPlayerOnOff" (win_pct_diff DESC)')
    conn.commit()

    # Copy from PlayerImpact
    cur.execute('''
        INSERT INTO "NBAPlayerOnOff" (
            season, player_id, player_name, team,
            games_played, games_missed,
            team_wins_with, team_losses_with,
            team_wins_without, team_losses_without,
            win_pct_with, win_pct_without, win_pct_diff,
            avg_pts_with, avg_pts_without, net_impact
        )
        SELECT
            CAST(season AS VARCHAR),
            "playerId",
            "playerName",
            team,
            "gamesPlayed",
            "gamesMissed",
            "teamWinsWithPlayer",
            "teamLossesWithPlayer",
            "teamWinsWithoutPlayer",
            "teamLossesWithoutPlayer",
            "winPctWithPlayer",
            "winPctWithoutPlayer",
            "winPctDiff",
            "avgPointsWithPlayer",
            "avgPointsWithoutPlayer",
            "netImpact"
        FROM "PlayerImpact"
        WHERE league = 'nba'
        ON CONFLICT (season, player_id) DO NOTHING
    ''')
    copied = cur.rowcount
    conn.commit()

    cur.close()
    conn.close()

    print(f"✅ Created {copied} NBA player on/off records from PlayerImpact")
    return copied


# =============================================================================
# 3. SOCCER HALF SCORING
# =============================================================================
def backfill_soccer_half_scoring():
    """Backfill soccer half scoring from ESPN API using summary endpoint"""
    print("\n" + "="*60)
    print("3. SOCCER HALF SCORING BACKFILL")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    # Create table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "SoccerPeriodScoring" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(20),
            external_game_id VARCHAR(100),
            game_date TIMESTAMP,
            home_team VARCHAR(100),
            away_team VARCHAR(100),
            home_1h INT,
            home_2h INT,
            away_1h INT,
            away_2h INT,
            home_total INT,
            away_total INT,
            created_at TIMESTAMP DEFAULT NOW(),
            UNIQUE(league, external_game_id)
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS idx_soccerperiod_date ON "SoccerPeriodScoring" (league, game_date)')
    conn.commit()

    total = 0
    base_date = datetime.now()

    for days_back in range(0, 90):
        date = base_date - timedelta(days=days_back)
        date_str = date.strftime('%Y%m%d')

        time.sleep(REQUEST_DELAY)

        # Get scoreboard for event IDs
        url = f'https://site.api.espn.com/apis/site/v2/sports/soccer/eng.1/scoreboard?dates={date_str}'
        try:
            resp = requests.get(url, timeout=30)
            if resp.status_code != 200:
                continue

            data = resp.json()
            events = data.get('events', [])

            for event in events:
                event_id = event.get('id')
                if not event_id:
                    continue

                # Check if completed
                competitions = event.get('competitions', [])
                if not competitions:
                    continue
                comp = competitions[0]
                if not comp.get('status', {}).get('type', {}).get('completed', False):
                    continue

                # Get summary for half scores
                time.sleep(REQUEST_DELAY)
                summary_url = f'https://site.api.espn.com/apis/site/v2/sports/soccer/eng.1/summary?event={event_id}'
                try:
                    summary_resp = requests.get(summary_url, timeout=30)
                    if summary_resp.status_code != 200:
                        continue

                    summary_data = summary_resp.json()
                    header = summary_data.get('header', {})
                    comps = header.get('competitions', [])
                    if not comps:
                        continue

                    home_data = away_data = None
                    for c in comps[0].get('competitors', []):
                        if c.get('homeAway') == 'home':
                            home_data = c
                        else:
                            away_data = c

                    if not home_data or not away_data:
                        continue

                    home_ls = home_data.get('linescores', [])
                    away_ls = away_data.get('linescores', [])

                    if len(home_ls) >= 2 and len(away_ls) >= 2:
                        cur.execute('''
                            INSERT INTO "SoccerPeriodScoring" (
                                league, external_game_id, game_date,
                                home_team, away_team,
                                home_1h, home_2h, away_1h, away_2h,
                                home_total, away_total
                            )
                            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                            ON CONFLICT (league, external_game_id) DO NOTHING
                        ''', (
                            'epl', event_id, event.get('date'),
                            home_data.get('team', {}).get('displayName'),
                            away_data.get('team', {}).get('displayName'),
                            int(home_ls[0].get('displayValue', 0)),
                            int(home_ls[1].get('displayValue', 0)),
                            int(away_ls[0].get('displayValue', 0)),
                            int(away_ls[1].get('displayValue', 0)),
                            int(home_data.get('score', 0)),
                            int(away_data.get('score', 0))
                        ))
                        if cur.rowcount > 0:
                            total += 1
                except Exception as e:
                    pass

        except Exception as e:
            continue

        if total > 0 and total % 10 == 0:
            print(f"  Progress: {total} matches with half scoring...")
            conn.commit()

    conn.commit()
    cur.close()
    conn.close()

    print(f"✅ Added {total} soccer matches with half scoring")
    return total


# =============================================================================
# 4. INJURY TIMESTAMP HISTORY
# =============================================================================
def backfill_injury_timestamps():
    """Create injury status history tracking"""
    print("\n" + "="*60)
    print("4. INJURY TIMESTAMP HISTORY")
    print("="*60)

    conn = get_db_connection()
    cur = conn.cursor()

    # Create history table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "InjuryStatusHistory" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(20),
            player_external_id VARCHAR(100),
            player_name VARCHAR(200),
            team VARCHAR(50),
            status VARCHAR(50),
            previous_status VARCHAR(50),
            injury VARCHAR(200),
            status_changed_at TIMESTAMP,
            created_at TIMESTAMP DEFAULT NOW()
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS idx_injuryhistory_player ON "InjuryStatusHistory" (player_external_id, status_changed_at)')
    conn.commit()

    # Add timestamp column to PlayerInjury if missing
    try:
        cur.execute('ALTER TABLE "PlayerInjury" ADD COLUMN IF NOT EXISTS status_changed_at TIMESTAMP')
        conn.commit()
    except:
        conn.rollback()

    # Set initial timestamps
    cur.execute('''
        UPDATE "PlayerInjury"
        SET status_changed_at = COALESCE(status_changed_at, "updatedAt", "createdAt", NOW())
        WHERE status_changed_at IS NULL
    ''')
    updated = cur.rowcount
    conn.commit()
    print(f"  Set timestamps for {updated} injury records")

    # Create history snapshot
    cur.execute('''
        INSERT INTO "InjuryStatusHistory" (
            league, player_external_id, player_name, team, status, injury, status_changed_at
        )
        SELECT
            league,
            "playerExternalId",
            "playerName",
            team,
            status,
            "injuryType",
            COALESCE(status_changed_at, "updatedAt", NOW())
        FROM "PlayerInjury"
        WHERE status IS NOT NULL
    ''')
    snapshot = cur.rowcount
    conn.commit()

    cur.close()
    conn.close()

    print(f"✅ Created {snapshot} injury history records")
    return snapshot


# =============================================================================
# MAIN
# =============================================================================
def main():
    print("="*60)
    print("CRITICAL DATA GAP BACKFILL - FINAL")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("="*60)

    results = {}

    # 1. NHL Goalies
    try:
        results['nhl_goalies'] = backfill_nhl_goalies()
    except Exception as e:
        print(f"NHL Goalie error: {e}")
        import traceback
        traceback.print_exc()
        results['nhl_goalies'] = 0

    # 2. NBA On/Off
    try:
        results['nba_on_off'] = backfill_nba_on_off()
    except Exception as e:
        print(f"NBA On/Off error: {e}")
        import traceback
        traceback.print_exc()
        results['nba_on_off'] = 0

    # 3. Soccer Half Scoring
    try:
        results['soccer_halves'] = backfill_soccer_half_scoring()
    except Exception as e:
        print(f"Soccer error: {e}")
        import traceback
        traceback.print_exc()
        results['soccer_halves'] = 0

    # 4. Injury Timestamps
    try:
        results['injury_history'] = backfill_injury_timestamps()
    except Exception as e:
        print(f"Injury error: {e}")
        import traceback
        traceback.print_exc()
        results['injury_history'] = 0

    print("\n" + "="*60)
    print("BACKFILL COMPLETE")
    print("="*60)
    print(f"NHL Goalies: {results.get('nhl_goalies', 0)}")
    print(f"NBA On/Off: {results.get('nba_on_off', 0)}")
    print(f"Soccer Half Scores: {results.get('soccer_halves', 0)}")
    print(f"Injury History: {results.get('injury_history', 0)}")
    print("="*60)

    return results


if __name__ == '__main__':
    main()
