#!/usr/bin/env python3
"""
Scrape NBA Play-by-Play Data from ESPN
Source: site.api.espn.com (no blocking issues)
Extracts: Quarter splits, clutch events, ball-handler proxy, shot attempts
"""
import psycopg2
import requests
import time
import random
from datetime import datetime, timedelta
import re

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def get_espn_pbp(espn_game_id):
    """Fetch play-by-play data from ESPN API"""
    url = f'https://site.api.espn.com/apis/site/v2/sports/basketball/nba/summary'
    params = {'event': espn_game_id}

    try:
        response = requests.get(url, params=params, timeout=30)
        if response.status_code == 200:
            data = response.json()
            return data.get('plays', [])
    except Exception as e:
        print(f"    Error fetching ESPN PBP for {espn_game_id}: {e}")
    return None

def parse_espn_plays(plays, game_id, game_date):
    """Parse ESPN plays to extract player quarter stats"""
    if not plays:
        return None

    player_stats = {}  # {player_id: {Q1: {...}, Q2: {...}, ...}}

    for play in plays:
        period = play.get('period', {}).get('number', 0)
        clock = play.get('clock', {}).get('displayValue', '')  # "5:30"
        score_value = play.get('scoreValue', 0)
        play_type = play.get('type', {}).get('text', '')
        participants = play.get('participants', [])
        home_score = play.get('homeScore', '')
        away_score = play.get('awayScore', '')

        # Get period as seconds remaining
        secs_remain = 0
        if clock:
            try:
                parts = clock.split(':')
                secs_remain = int(parts[0]) * 60 + int(parts[1])
            except:
                pass

        # Check if clutch (Q4+, last 5 min, margin <= 5)
        is_clutch = False
        if period >= 4 and secs_remain <= 300:
            try:
                if home_score and away_score:
                    margin = abs(int(home_score) - int(away_score))
                    is_clutch = margin <= 5
            except:
                pass

        # Determine quarter key
        if period <= 4:
            q_key = f'Q{period}'
        else:
            q_key = 'OT'

        # Process each participant
        for participant in participants:
            athlete = participant.get('athlete', {})
            player_id = athlete.get('id')
            player_name = athlete.get('displayName')
            team_data = play.get('team', {})
            team_abbr = team_data.get('abbreviation', '')

            if not player_id:
                continue

            # Initialize player if new
            if player_id not in player_stats:
                player_stats[player_id] = {
                    'name': player_name,
                    'team': team_abbr,
                    'Q1': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                    'Q2': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                    'Q3': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                    'Q4': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                    'OT': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                    'clutch': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                }

            stats = player_stats[player_id]
            play_type_lower = play_type.lower()

            # Categorize play types
            if 'made' in play_type_lower and 'free throw' not in play_type_lower:
                stats[q_key]['fga'] += 1
                stats[q_key]['fgm'] += 1
                pts = 3 if '3-point' in play_type_lower or 'three' in play_type_lower else 2
                stats[q_key]['pts'] += pts
                if is_clutch:
                    stats['clutch']['fga'] += 1
                    stats['clutch']['fgm'] += 1
                    stats['clutch']['pts'] += pts

            elif 'missed' in play_type_lower and 'free throw' not in play_type_lower:
                stats[q_key]['fga'] += 1
                if is_clutch:
                    stats['clutch']['fga'] += 1

            elif 'made free throw' in play_type_lower:
                stats[q_key]['fta'] += 1
                stats[q_key]['ftm'] += 1
                stats[q_key]['pts'] += 1
                if is_clutch:
                    stats['clutch']['fta'] += 1
                    stats['clutch']['ftm'] += 1
                    stats['clutch']['pts'] += 1

            elif 'missed free throw' in play_type_lower:
                stats[q_key]['fta'] += 1
                if is_clutch:
                    stats['clutch']['fta'] += 1

            elif 'turnover' in play_type_lower:
                stats[q_key]['to'] += 1
                if is_clutch:
                    stats['clutch']['to'] += 1

            elif 'assist' in play_type_lower:
                stats[q_key]['ast'] += 1
                if is_clutch:
                    stats['clutch']['ast'] += 1

    return player_stats

def scrape_espn_pbp():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create PlayerQuarterStats table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PlayerQuarterStats" (
            id SERIAL PRIMARY KEY,
            "gameId" VARCHAR(50) NOT NULL,
            "gameDate" DATE NOT NULL,
            "playerExternalId" VARCHAR(50),
            "playerName" VARCHAR(200),
            team VARCHAR(10),
            -- Q1 stats
            "q1Fga" INTEGER DEFAULT 0,
            "q1Fgm" INTEGER DEFAULT 0,
            "q1Fta" INTEGER DEFAULT 0,
            "q1Ftm" INTEGER DEFAULT 0,
            "q1Ast" INTEGER DEFAULT 0,
            "q1To" INTEGER DEFAULT 0,
            "q1Pts" INTEGER DEFAULT 0,
            -- Q2 stats
            "q2Fga" INTEGER DEFAULT 0,
            "q2Fgm" INTEGER DEFAULT 0,
            "q2Fta" INTEGER DEFAULT 0,
            "q2Ftm" INTEGER DEFAULT 0,
            "q2Ast" INTEGER DEFAULT 0,
            "q2To" INTEGER DEFAULT 0,
            "q2Pts" INTEGER DEFAULT 0,
            -- Q3 stats
            "q3Fga" INTEGER DEFAULT 0,
            "q3Fgm" INTEGER DEFAULT 0,
            "q3Fta" INTEGER DEFAULT 0,
            "q3Ftm" INTEGER DEFAULT 0,
            "q3Ast" INTEGER DEFAULT 0,
            "q3To" INTEGER DEFAULT 0,
            "q3Pts" INTEGER DEFAULT 0,
            -- Q4 stats
            "q4Fga" INTEGER DEFAULT 0,
            "q4Fgm" INTEGER DEFAULT 0,
            "q4Fta" INTEGER DEFAULT 0,
            "q4Ftm" INTEGER DEFAULT 0,
            "q4Ast" INTEGER DEFAULT 0,
            "q4To" INTEGER DEFAULT 0,
            "q4Pts" INTEGER DEFAULT 0,
            -- OT stats
            "otFga" INTEGER DEFAULT 0,
            "otFgm" INTEGER DEFAULT 0,
            "otFta" INTEGER DEFAULT 0,
            "otFtm" INTEGER DEFAULT 0,
            "otAst" INTEGER DEFAULT 0,
            "otTo" INTEGER DEFAULT 0,
            "otPts" INTEGER DEFAULT 0,
            -- Clutch stats (last 5 min Q4+, margin <= 5)
            "clutchFga" INTEGER DEFAULT 0,
            "clutchFgm" INTEGER DEFAULT 0,
            "clutchFta" INTEGER DEFAULT 0,
            "clutchFtm" INTEGER DEFAULT 0,
            "clutchAst" INTEGER DEFAULT 0,
            "clutchTo" INTEGER DEFAULT 0,
            "clutchPts" INTEGER DEFAULT 0,
            "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE("gameId", "playerExternalId")
        )
    ''')
    conn.commit()
    print("PlayerQuarterStats table created/verified")

    # Get recent dates to scrape from ESPN directly
    games = []
    print("\nFetching games from ESPN scoreboard...")

    for days_ago in range(1, 15):
        game_date = datetime.now() - timedelta(days=days_ago)
        date_str = game_date.strftime('%Y%m%d')

        # Check if we already have this date
        cur.execute('''
            SELECT COUNT(*) FROM "PlayerQuarterStats"
            WHERE "gameDate" = %s
        ''', (game_date.date(),))
        existing = cur.fetchone()[0]
        if existing > 0:
            continue

        # Fetch from ESPN
        url = f'https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard?dates={date_str}'
        try:
            response = requests.get(url, timeout=15)
            if response.status_code == 200:
                data = response.json()
                events = data.get('events', [])
                for event in events:
                    # Only get completed games
                    status = event.get('status', {}).get('type', {}).get('completed', False)
                    if status:
                        games.append((event['id'], game_date.date()))
                print(f"  {date_str}: {len(events)} games")
        except Exception as e:
            print(f"  {date_str}: error - {e}")

        time.sleep(random.uniform(0.3, 0.6))

    print(f"\nFound {len(games)} completed games to scrape PBP data")

    total_players = 0
    for espn_id, game_date in games:
        game_id = f"espn:{espn_id}"

        print(f"  Scraping {espn_id} ({game_date})...", end=' ')

        plays = get_espn_pbp(espn_id)
        if not plays:
            print("no data")
            time.sleep(random.uniform(0.5, 1))
            continue

        player_stats = parse_espn_plays(plays, game_id, game_date)
        if not player_stats:
            print("parse failed")
            time.sleep(random.uniform(0.5, 1))
            continue

        # Insert player stats
        game_players = 0
        for player_id, stats in player_stats.items():
            if not player_id:
                continue

            q1, q2, q3, q4, ot, clutch = stats['Q1'], stats['Q2'], stats['Q3'], stats['Q4'], stats['OT'], stats['clutch']

            # Skip players with no recorded stats
            total_actions = sum([
                q1['fga'] + q1['fta'] + q1['ast'] + q1['to'],
                q2['fga'] + q2['fta'] + q2['ast'] + q2['to'],
                q3['fga'] + q3['fta'] + q3['ast'] + q3['to'],
                q4['fga'] + q4['fta'] + q4['ast'] + q4['to'],
            ])
            if total_actions == 0:
                continue

            try:
                cur.execute('''
                    INSERT INTO "PlayerQuarterStats"
                    ("gameId", "gameDate", "playerExternalId", "playerName", team,
                     "q1Fga", "q1Fgm", "q1Fta", "q1Ftm", "q1Ast", "q1To", "q1Pts",
                     "q2Fga", "q2Fgm", "q2Fta", "q2Ftm", "q2Ast", "q2To", "q2Pts",
                     "q3Fga", "q3Fgm", "q3Fta", "q3Ftm", "q3Ast", "q3To", "q3Pts",
                     "q4Fga", "q4Fgm", "q4Fta", "q4Ftm", "q4Ast", "q4To", "q4Pts",
                     "otFga", "otFgm", "otFta", "otFtm", "otAst", "otTo", "otPts",
                     "clutchFga", "clutchFgm", "clutchFta", "clutchFtm", "clutchAst", "clutchTo", "clutchPts")
                    VALUES (%s, %s, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s, %s,
                            %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT ("gameId", "playerExternalId") DO UPDATE SET
                        "q1Pts" = EXCLUDED."q1Pts", "q2Pts" = EXCLUDED."q2Pts",
                        "q3Pts" = EXCLUDED."q3Pts", "q4Pts" = EXCLUDED."q4Pts",
                        "clutchPts" = EXCLUDED."clutchPts"
                ''', (
                    game_id, game_date, str(player_id), stats['name'], stats['team'],
                    q1['fga'], q1['fgm'], q1['fta'], q1['ftm'], q1['ast'], q1['to'], q1['pts'],
                    q2['fga'], q2['fgm'], q2['fta'], q2['ftm'], q2['ast'], q2['to'], q2['pts'],
                    q3['fga'], q3['fgm'], q3['fta'], q3['ftm'], q3['ast'], q3['to'], q3['pts'],
                    q4['fga'], q4['fgm'], q4['fta'], q4['ftm'], q4['ast'], q4['to'], q4['pts'],
                    ot['fga'], ot['fgm'], ot['fta'], ot['ftm'], ot['ast'], ot['to'], ot['pts'],
                    clutch['fga'], clutch['fgm'], clutch['fta'], clutch['ftm'], clutch['ast'], clutch['to'], clutch['pts']
                ))
                game_players += 1
                total_players += 1
            except Exception as e:
                print(f"\n    Error inserting player {player_id}: {e}")
                conn.rollback()
                continue

        conn.commit()
        print(f"{game_players} players")

        # Rate limiting
        time.sleep(random.uniform(0.3, 0.8))

    cur.execute('SELECT COUNT(*) FROM "PlayerQuarterStats"')
    total = cur.fetchone()[0]

    # Show sample data
    if total > 0:
        cur.execute('''
            SELECT "playerName", team,
                   SUM("q1Pts" + "q2Pts") as first_half,
                   SUM("q3Pts" + "q4Pts") as second_half,
                   SUM("q4Pts") as q4_pts,
                   SUM("clutchPts") as clutch_pts
            FROM "PlayerQuarterStats"
            GROUP BY "playerName", team
            HAVING COUNT(*) >= 5
            ORDER BY SUM("q4Pts") DESC
            LIMIT 10
        ''')
        results = cur.fetchall()
        if results:
            print("\nTop Q4 Scorers (min 5 games):")
            for row in results:
                print(f"  {row[0]} ({row[1]}): 1H={row[2]}, 2H={row[3]}, Q4={row[4]}, Clutch={row[5]}")

    cur.close()
    conn.close()
    print(f"\nPlayerQuarterStats complete: {total} records (added {total_players} this run)")

if __name__ == '__main__':
    scrape_espn_pbp()
