#!/usr/bin/env python3
"""
Compute Player Performance Splits
Links player performance thresholds to team win rates and game outcomes
Answers: "Celtics win rate when Tatum scores under 25?" "Team performance when star in foul trouble?"
Supports: NBA, NFL, NHL
"""
import psycopg2
from datetime import datetime

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def compute_player_performance_splits():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create PlayerPerformanceSplits table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PlayerPerformanceSplits" (
            id SERIAL PRIMARY KEY,
            league VARCHAR(20) NOT NULL,
            season VARCHAR(20),
            "playerExternalId" TEXT,
            "playerName" VARCHAR(200),
            team VARCHAR(100) NOT NULL,
            -- Scoring threshold splits
            "totalGames" INTEGER DEFAULT 0,
            "avgPoints" NUMERIC,
            "scoringThreshold" NUMERIC,
            "gamesAboveThreshold" INTEGER DEFAULT 0,
            "gamesAtOrBelowThreshold" INTEGER DEFAULT 0,
            -- Win rates by player performance
            "teamWinRateAboveThreshold" NUMERIC,
            "teamWinRateBelowThreshold" NUMERIC,
            "winRateDifferential" NUMERIC,
            -- High/low game analysis
            "avgPointsInWins" NUMERIC,
            "avgPointsInLosses" NUMERIC,
            "avgPointsInCloseGames" NUMERIC,
            "avgPointsInBlowouts" NUMERIC,
            -- Foul trouble analysis (NBA)
            "gamesWithFoulTrouble" INTEGER DEFAULT 0,
            "teamWinRateInFoulTrouble" NUMERIC,
            "teamWinRateNoFoulTrouble" NUMERIC,
            -- Impact classification
            "performanceImpact" VARCHAR(30),
            "updatedAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE(league, "playerExternalId", team, season)
        )
    ''')
    conn.commit()
    print("PlayerPerformanceSplits table created/verified")

    # NBA: Compute player performance splits
    print("\nComputing NBA player performance splits...")
    cur.execute('''
        WITH player_game_stats AS (
            SELECT
                pgm.league,
                pgm."playerExternalId",
                pgm.team,
                pgm."gameDate",
                MAX(CASE WHEN pgm."statKey" IN ('espn_pts', 'nba_pts', 'points') THEN pgm.value END) as points,
                MAX(CASE WHEN pgm."statKey" = 'espn_pf' THEN pgm.value END) as fouls
            FROM "PlayerGameMetric" pgm
            WHERE pgm.league = 'nba'
              AND pgm."statKey" IN ('espn_pts', 'nba_pts', 'points', 'espn_pf')
            GROUP BY pgm.league, pgm."playerExternalId", pgm.team, pgm."gameDate"
            HAVING MAX(CASE WHEN pgm."statKey" IN ('espn_pts', 'nba_pts', 'points') THEN pgm.value END) IS NOT NULL
        ),
        player_averages AS (
            SELECT
                league,
                "playerExternalId",
                team,
                AVG(points) as avg_points,
                COUNT(*) as total_games
            FROM player_game_stats
            WHERE points > 5  -- Filter out garbage time appearances
            GROUP BY league, "playerExternalId", team
            HAVING COUNT(*) >= 5
        ),
        player_with_outcomes AS (
            SELECT
                pgs.league,
                pgs."playerExternalId",
                pgs.team,
                pgs."gameDate",
                pgs.points,
                pgs.fouls,
                pa.avg_points,
                pa.total_games,
                -- Get game outcomes from SportsGame
                CASE
                    WHEN pgs.team = sg."homeTeam" THEN
                        CASE WHEN sg."homeScore" > sg."awayScore" THEN 1 ELSE 0 END
                    ELSE
                        CASE WHEN sg."awayScore" > sg."homeScore" THEN 1 ELSE 0 END
                END as won,
                -- Get game type from GameStateTracking
                gst."closeGame",
                gst.blowout
            FROM player_game_stats pgs
            JOIN player_averages pa ON pgs."playerExternalId" = pa."playerExternalId" AND pgs.team = pa.team
            JOIN "SportsGame" sg ON
                sg.league = pgs.league
                AND sg."gameDate" = pgs."gameDate"
                AND (sg."homeTeam" = pgs.team OR sg."awayTeam" = pgs.team)
            LEFT JOIN "GameStateTracking" gst ON
                gst.league = pgs.league
                AND gst."gameDate" = pgs."gameDate"
                AND (gst."homeTeam" = pgs.team OR gst."awayTeam" = pgs.team)
            WHERE sg."homeScore" IS NOT NULL
        ),
        player_splits AS (
            SELECT
                league,
                "playerExternalId",
                team,
                EXTRACT(YEAR FROM "gameDate")::text as season,
                COUNT(*) as total_games,
                ROUND(AVG(points)::numeric, 1) as avg_pts,
                ROUND(AVG(points)) as threshold,
                -- Above threshold
                COUNT(CASE WHEN points > avg_points THEN 1 END) as games_above,
                AVG(CASE WHEN points > avg_points THEN won END) as win_rate_above,
                -- At or below threshold
                COUNT(CASE WHEN points <= avg_points THEN 1 END) as games_below,
                AVG(CASE WHEN points <= avg_points THEN won END) as win_rate_below,
                -- Win/loss scoring
                AVG(CASE WHEN won = 1 THEN points END) as avg_pts_wins,
                AVG(CASE WHEN won = 0 THEN points END) as avg_pts_losses,
                -- Close vs blowout
                AVG(CASE WHEN "closeGame" THEN points END) as avg_pts_close,
                AVG(CASE WHEN blowout THEN points END) as avg_pts_blowout,
                -- Foul trouble (4+ fouls = trouble in NBA)
                COUNT(CASE WHEN fouls >= 4 THEN 1 END) as foul_trouble_games,
                AVG(CASE WHEN fouls >= 4 THEN won END) as win_rate_foul_trouble,
                AVG(CASE WHEN fouls < 4 OR fouls IS NULL THEN won END) as win_rate_no_foul_trouble
            FROM player_with_outcomes
            GROUP BY league, "playerExternalId", team, EXTRACT(YEAR FROM "gameDate")
            HAVING COUNT(*) >= 5
        )
        INSERT INTO "PlayerPerformanceSplits"
        (league, season, "playerExternalId", team, "totalGames", "avgPoints", "scoringThreshold",
         "gamesAboveThreshold", "gamesAtOrBelowThreshold",
         "teamWinRateAboveThreshold", "teamWinRateBelowThreshold", "winRateDifferential",
         "avgPointsInWins", "avgPointsInLosses", "avgPointsInCloseGames", "avgPointsInBlowouts",
         "gamesWithFoulTrouble", "teamWinRateInFoulTrouble", "teamWinRateNoFoulTrouble",
         "performanceImpact")
        SELECT
            league, season, "playerExternalId", team,
            total_games, avg_pts, threshold,
            games_above, games_below,
            ROUND(win_rate_above::numeric, 3),
            ROUND(win_rate_below::numeric, 3),
            ROUND((win_rate_above - win_rate_below)::numeric, 3),
            ROUND(avg_pts_wins::numeric, 1),
            ROUND(avg_pts_losses::numeric, 1),
            ROUND(avg_pts_close::numeric, 1),
            ROUND(avg_pts_blowout::numeric, 1),
            foul_trouble_games,
            ROUND(win_rate_foul_trouble::numeric, 3),
            ROUND(win_rate_no_foul_trouble::numeric, 3),
            CASE
                WHEN (win_rate_above - win_rate_below) > 0.20 THEN 'HIGH_SCORER_WINS'
                WHEN (win_rate_above - win_rate_below) < -0.10 THEN 'EFFICIENT_WINNER'
                ELSE 'BALANCED'
            END
        FROM player_splits
        ON CONFLICT (league, "playerExternalId", team, season) DO UPDATE SET
            "totalGames" = EXCLUDED."totalGames",
            "avgPoints" = EXCLUDED."avgPoints",
            "scoringThreshold" = EXCLUDED."scoringThreshold",
            "gamesAboveThreshold" = EXCLUDED."gamesAboveThreshold",
            "gamesAtOrBelowThreshold" = EXCLUDED."gamesAtOrBelowThreshold",
            "teamWinRateAboveThreshold" = EXCLUDED."teamWinRateAboveThreshold",
            "teamWinRateBelowThreshold" = EXCLUDED."teamWinRateBelowThreshold",
            "winRateDifferential" = EXCLUDED."winRateDifferential",
            "avgPointsInWins" = EXCLUDED."avgPointsInWins",
            "avgPointsInLosses" = EXCLUDED."avgPointsInLosses",
            "avgPointsInCloseGames" = EXCLUDED."avgPointsInCloseGames",
            "avgPointsInBlowouts" = EXCLUDED."avgPointsInBlowouts",
            "gamesWithFoulTrouble" = EXCLUDED."gamesWithFoulTrouble",
            "teamWinRateInFoulTrouble" = EXCLUDED."teamWinRateInFoulTrouble",
            "teamWinRateNoFoulTrouble" = EXCLUDED."teamWinRateNoFoulTrouble",
            "performanceImpact" = EXCLUDED."performanceImpact",
            "updatedAt" = NOW()
    ''')
    nba_count = cur.rowcount
    conn.commit()
    print(f"  NBA: {nba_count} player performance split records")

    # Add player names
    print("\nAdding player names...")
    cur.execute('''
        UPDATE "PlayerPerformanceSplits" pps
        SET "playerName" = p.name
        FROM "Player" p
        WHERE pps."playerExternalId" = p."externalPlayerId"
          AND pps."playerName" IS NULL
    ''')
    conn.commit()

    # Summary
    cur.execute('''
        SELECT "performanceImpact", COUNT(*),
               ROUND(AVG("winRateDifferential")::numeric, 3),
               ROUND(AVG("totalGames")::numeric, 0)
        FROM "PlayerPerformanceSplits"
        WHERE "performanceImpact" IS NOT NULL
        GROUP BY "performanceImpact"
        ORDER BY COUNT(*) DESC
    ''')
    print("\nPerformance Impact Distribution:")
    for row in cur.fetchall():
        print(f"  {row[0]}: {row[1]} players (avg win diff: {row[2]:+.3f}, avg games: {row[3]})")

    # Show notable examples
    cur.execute('''
        SELECT "playerName", team, "scoringThreshold",
               "teamWinRateAboveThreshold", "teamWinRateBelowThreshold", "winRateDifferential"
        FROM "PlayerPerformanceSplits"
        WHERE "playerName" IS NOT NULL
          AND "totalGames" >= 10
        ORDER BY ABS("winRateDifferential") DESC
        LIMIT 10
    ''')
    results = cur.fetchall()
    if results:
        print("\nMost Impactful Players (Win rate by scoring):")
        for row in results:
            above = row[3] * 100 if row[3] else 0
            below = row[4] * 100 if row[4] else 0
            diff = row[5] if row[5] else 0
            print(f"  {row[0]} ({row[1]}): >{row[2]}pts = {above:.0f}% W, <={row[2]}pts = {below:.0f}% W (diff: {diff:+.3f})")

    cur.execute('SELECT COUNT(*) FROM "PlayerPerformanceSplits"')
    total = cur.fetchone()[0]

    cur.close()
    conn.close()
    print(f"\n✅ Player performance splits complete: {total} records")

if __name__ == '__main__':
    compute_player_performance_splits()
