#!/usr/bin/env python3
"""
Scrape NBA Play-by-Play Data
Source: stats.nba.com/stats/playbyplayv2
Extracts: Quarter splits, clutch events, ball-handler proxy, shot attempts
"""
import psycopg2
import requests
import time
import random
from datetime import datetime, timedelta
import json

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def load_proxies():
    """Load proxies from env file"""
    proxies_list = []
    try:
        with open('/var/www/html/eventheodds/.env', 'r') as f:
            for line in f:
                if line.startswith('PROXY_') and '=' in line:
                    proxy = line.split('=', 1)[1].strip()
                    if proxy:
                        proxies_list.append(proxy)
    except:
        pass

    # Default public proxies (SOCKS5 and HTTP) - replace with your own
    if not proxies_list:
        proxies_list = [
            None,  # Direct connection first
            # Add your proxies here in format: http://ip:port or socks5://ip:port
        ]
    return proxies_list

PROXIES = load_proxies()
CURRENT_PROXY_IDX = 0

def get_next_proxy():
    """Rotate to next proxy"""
    global CURRENT_PROXY_IDX
    CURRENT_PROXY_IDX = (CURRENT_PROXY_IDX + 1) % len(PROXIES)
    return PROXIES[CURRENT_PROXY_IDX]

def get_proxy_dict(proxy):
    """Convert proxy string to requests format"""
    if not proxy:
        return None
    return {
        'http': proxy,
        'https': proxy,
    }

# Headers to avoid NBA.com blocking - rotate user agents
USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
]

def get_headers():
    """Get headers with rotated user agent"""
    return {
        'Host': 'stats.nba.com',
        'User-Agent': random.choice(USER_AGENTS),
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'x-nba-stats-origin': 'stats',
        'x-nba-stats-token': 'true',
        'Origin': 'https://www.nba.com',
        'Referer': 'https://www.nba.com/',
        'Connection': 'keep-alive',
    }

# Keep old HEADERS for backward compat
HEADERS = get_headers()

def get_pbp_data(game_id):
    """Fetch play-by-play data for a game"""
    url = 'https://stats.nba.com/stats/playbyplayv2'
    params = {
        'GameID': game_id,
        'StartPeriod': 1,
        'EndPeriod': 10,  # Include OT
    }

    response = make_request(url, params)
    if response:
        try:
            data = response.json()
            if 'resultSets' in data and len(data['resultSets']) > 0:
                headers = data['resultSets'][0]['headers']
                rows = data['resultSets'][0]['rowSet']
                return [dict(zip(headers, row)) for row in rows]
        except Exception as e:
            print(f"    Error parsing PBP for {game_id}: {e}")
    return None

def parse_pbp_events(pbp_data, game_id):
    """Parse PBP events to extract player quarter stats and clutch events"""
    if not pbp_data:
        return None

    player_stats = {}  # {player_id: {Q1: {...}, Q2: {...}, ...}}
    clutch_events = []  # Events in last 5 min with margin <= 5

    for event in pbp_data:
        period = event.get('PERIOD', 0)
        clock = event.get('PCTIMESTRING', '')  # "MM:SS"
        score_home = event.get('SCOREHOME', '')
        score_away = event.get('SCOREVISITOR', '')
        event_type = event.get('EVENTMSGTYPE', 0)
        player1_id = event.get('PLAYER1_ID')
        player1_name = event.get('PLAYER1_NAME')
        player1_team = event.get('PLAYER1_TEAM_ABBREVIATION')
        description = event.get('HOMEDESCRIPTION', '') or event.get('VISITORDESCRIPTION', '') or event.get('NEUTRALDESCRIPTION', '')

        if not player1_id:
            continue

        # Initialize player if new
        if player1_id not in player_stats:
            player_stats[player1_id] = {
                'name': player1_name,
                'team': player1_team,
                'Q1': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                'Q2': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                'Q3': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                'Q4': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                'OT': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                'clutch': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
            }

        # Determine quarter key
        if period <= 4:
            q_key = f'Q{period}'
        else:
            q_key = 'OT'

        # Check if clutch (Q4/OT, last 5 min, margin <= 5)
        is_clutch = False
        if period >= 4 and clock:
            try:
                mins, secs = map(int, clock.split(':'))
                time_remaining = mins * 60 + secs
                if time_remaining <= 300:  # Last 5 minutes
                    if score_home and score_away:
                        try:
                            margin = abs(int(score_home) - int(score_away))
                            is_clutch = margin <= 5
                        except:
                            pass
            except:
                pass

        # Parse event types
        # 1 = Made FG, 2 = Missed FG, 3 = Made FT, 4 = Missed FT, 5 = Turnover, 10 = Assist
        stats = player_stats[player1_id]

        if event_type == 1:  # Made shot
            stats[q_key]['fga'] += 1
            stats[q_key]['fgm'] += 1
            # Determine points (check for 3PT in description)
            pts = 3 if '3PT' in description.upper() else 2
            stats[q_key]['pts'] += pts
            if is_clutch:
                stats['clutch']['fga'] += 1
                stats['clutch']['fgm'] += 1
                stats['clutch']['pts'] += pts

        elif event_type == 2:  # Missed shot
            stats[q_key]['fga'] += 1
            if is_clutch:
                stats['clutch']['fga'] += 1

        elif event_type == 3:  # Made FT
            stats[q_key]['fta'] += 1
            stats[q_key]['ftm'] += 1
            stats[q_key]['pts'] += 1
            if is_clutch:
                stats['clutch']['fta'] += 1
                stats['clutch']['ftm'] += 1
                stats['clutch']['pts'] += 1

        elif event_type == 4:  # Missed FT
            stats[q_key]['fta'] += 1
            if is_clutch:
                stats['clutch']['fta'] += 1

        elif event_type == 5:  # Turnover
            stats[q_key]['to'] += 1
            if is_clutch:
                stats['clutch']['to'] += 1

        # Check for assist (PLAYER2 is the assister in made FG events)
        if event_type == 1:
            player2_id = event.get('PLAYER2_ID')
            player2_name = event.get('PLAYER2_NAME')
            player2_team = event.get('PLAYER2_TEAM_ABBREVIATION')
            if player2_id:
                if player2_id not in player_stats:
                    player_stats[player2_id] = {
                        'name': player2_name,
                        'team': player2_team,
                        'Q1': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                        'Q2': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                        'Q3': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                        'Q4': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                        'OT': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                        'clutch': {'fga': 0, 'fgm': 0, 'fta': 0, 'ftm': 0, 'ast': 0, 'to': 0, 'pts': 0},
                    }
                player_stats[player2_id][q_key]['ast'] += 1
                if is_clutch:
                    player_stats[player2_id]['clutch']['ast'] += 1

    return player_stats

def make_request(url, params, max_retries=3):
    """Make request with proxy rotation and retries"""
    for attempt in range(max_retries):
        proxy = PROXIES[CURRENT_PROXY_IDX] if PROXIES else None
        proxy_dict = get_proxy_dict(proxy)

        try:
            response = requests.get(
                url,
                headers=get_headers(),
                params=params,
                proxies=proxy_dict,
                timeout=30
            )
            if response.status_code == 200:
                return response
            elif response.status_code in [403, 429]:
                print(f"    Blocked (status {response.status_code}), rotating proxy...")
                get_next_proxy()
                time.sleep(random.uniform(2, 5))
        except requests.exceptions.RequestException as e:
            print(f"    Request error: {e}, trying next proxy...")
            get_next_proxy()
            time.sleep(random.uniform(1, 3))

    return None

def get_games_for_date(game_date):
    """Fetch all NBA games for a specific date from NBA.com"""
    url = 'https://stats.nba.com/stats/scoreboardv2'
    params = {
        'GameDate': game_date.strftime('%Y-%m-%d'),
        'LeagueID': '00',
        'DayOffset': 0,
    }

    response = make_request(url, params)
    if response:
        try:
            data = response.json()
            if 'resultSets' in data:
                for result_set in data['resultSets']:
                    if result_set['name'] == 'GameHeader':
                        headers = result_set['headers']
                        rows = result_set['rowSet']
                        games = [dict(zip(headers, row)) for row in rows]
                        return [g['GAME_ID'] for g in games if g.get('GAME_STATUS_ID') == 3]
        except Exception as e:
            print(f"    Error parsing games for {game_date}: {e}")
    return []

def scrape_nba_pbp():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create PlayerQuarterStats table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PlayerQuarterStats" (
            id SERIAL PRIMARY KEY,
            "gameId" VARCHAR(20) NOT NULL,
            "gameDate" DATE NOT NULL,
            "playerExternalId" VARCHAR(50),
            "playerName" VARCHAR(200),
            team VARCHAR(10),
            -- Q1 stats
            "q1Fga" INTEGER DEFAULT 0,
            "q1Fgm" INTEGER DEFAULT 0,
            "q1Fta" INTEGER DEFAULT 0,
            "q1Ftm" INTEGER DEFAULT 0,
            "q1Ast" INTEGER DEFAULT 0,
            "q1To" INTEGER DEFAULT 0,
            "q1Pts" INTEGER DEFAULT 0,
            -- Q2 stats
            "q2Fga" INTEGER DEFAULT 0,
            "q2Fgm" INTEGER DEFAULT 0,
            "q2Fta" INTEGER DEFAULT 0,
            "q2Ftm" INTEGER DEFAULT 0,
            "q2Ast" INTEGER DEFAULT 0,
            "q2To" INTEGER DEFAULT 0,
            "q2Pts" INTEGER DEFAULT 0,
            -- Q3 stats
            "q3Fga" INTEGER DEFAULT 0,
            "q3Fgm" INTEGER DEFAULT 0,
            "q3Fta" INTEGER DEFAULT 0,
            "q3Ftm" INTEGER DEFAULT 0,
            "q3Ast" INTEGER DEFAULT 0,
            "q3To" INTEGER DEFAULT 0,
            "q3Pts" INTEGER DEFAULT 0,
            -- Q4 stats
            "q4Fga" INTEGER DEFAULT 0,
            "q4Fgm" INTEGER DEFAULT 0,
            "q4Fta" INTEGER DEFAULT 0,
            "q4Ftm" INTEGER DEFAULT 0,
            "q4Ast" INTEGER DEFAULT 0,
            "q4To" INTEGER DEFAULT 0,
            "q4Pts" INTEGER DEFAULT 0,
            -- OT stats
            "otFga" INTEGER DEFAULT 0,
            "otFgm" INTEGER DEFAULT 0,
            "otFta" INTEGER DEFAULT 0,
            "otFtm" INTEGER DEFAULT 0,
            "otAst" INTEGER DEFAULT 0,
            "otTo" INTEGER DEFAULT 0,
            "otPts" INTEGER DEFAULT 0,
            -- Clutch stats (last 5 min, margin <= 5)
            "clutchFga" INTEGER DEFAULT 0,
            "clutchFgm" INTEGER DEFAULT 0,
            "clutchFta" INTEGER DEFAULT 0,
            "clutchFtm" INTEGER DEFAULT 0,
            "clutchAst" INTEGER DEFAULT 0,
            "clutchTo" INTEGER DEFAULT 0,
            "clutchPts" INTEGER DEFAULT 0,
            -- Derived
            "firstHalfPts" INTEGER GENERATED ALWAYS AS ("q1Pts" + "q2Pts") STORED,
            "secondHalfPts" INTEGER GENERATED ALWAYS AS ("q3Pts" + "q4Pts") STORED,
            "q4Usage" NUMERIC GENERATED ALWAYS AS (
                CASE WHEN ("q4Fga" + "q4Fta" + "q4To") > 0
                THEN ("q4Fga" + "q4Fta" * 0.44 + "q4To")::numeric
                ELSE 0 END
            ) STORED,
            "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE("gameId", "playerExternalId")
        )
    ''')
    conn.commit()
    print("PlayerQuarterStats table created/verified")

    # Get dates with NBA games that we haven't fully scraped
    cur.execute('''
        SELECT DISTINCT sg."gameDate"::date as gd
        FROM "SportsGame" sg
        WHERE sg.league = 'nba'
          AND sg."homeScore" IS NOT NULL
          AND sg."gameDate" >= CURRENT_DATE - INTERVAL '14 days'
        ORDER BY gd DESC
        LIMIT 14
    ''')
    game_dates = [row[0] for row in cur.fetchall()]
    print(f"\nChecking {len(game_dates)} recent dates for NBA games...")

    # Get game IDs from NBA.com for each date
    games = []
    for game_date in game_dates:
        # Check if we already have this date scraped
        cur.execute('''
            SELECT COUNT(*) FROM "PlayerQuarterStats" WHERE "gameDate" = %s
        ''', (game_date,))
        existing = cur.fetchone()[0]
        if existing > 0:
            continue

        game_ids = get_games_for_date(game_date)
        for gid in game_ids:
            games.append((gid, game_date))
        time.sleep(random.uniform(0.5, 1))

    print(f"Found {len(games)} games to scrape PBP data")

    total_players = 0
    for game_id, game_date in games:
        print(f"  Scraping {game_id} ({game_date})...", end=' ')

        pbp_data = get_pbp_data(game_id)
        if not pbp_data:
            print("no data")
            time.sleep(random.uniform(2, 4))
            continue

        player_stats = parse_pbp_events(pbp_data, game_id)
        if not player_stats:
            print("parse failed")
            time.sleep(random.uniform(2, 4))
            continue

        # Insert player stats
        for player_id, stats in player_stats.items():
            if not player_id:
                continue

            q1, q2, q3, q4, ot, clutch = stats['Q1'], stats['Q2'], stats['Q3'], stats['Q4'], stats['OT'], stats['clutch']

            cur.execute('''
                INSERT INTO "PlayerQuarterStats"
                ("gameId", "gameDate", "playerExternalId", "playerName", team,
                 "q1Fga", "q1Fgm", "q1Fta", "q1Ftm", "q1Ast", "q1To", "q1Pts",
                 "q2Fga", "q2Fgm", "q2Fta", "q2Ftm", "q2Ast", "q2To", "q2Pts",
                 "q3Fga", "q3Fgm", "q3Fta", "q3Ftm", "q3Ast", "q3To", "q3Pts",
                 "q4Fga", "q4Fgm", "q4Fta", "q4Ftm", "q4Ast", "q4To", "q4Pts",
                 "otFga", "otFgm", "otFta", "otFtm", "otAst", "otTo", "otPts",
                 "clutchFga", "clutchFgm", "clutchFta", "clutchFtm", "clutchAst", "clutchTo", "clutchPts")
                VALUES (%s, %s, %s, %s, %s,
                        %s, %s, %s, %s, %s, %s, %s,
                        %s, %s, %s, %s, %s, %s, %s,
                        %s, %s, %s, %s, %s, %s, %s,
                        %s, %s, %s, %s, %s, %s, %s,
                        %s, %s, %s, %s, %s, %s, %s,
                        %s, %s, %s, %s, %s, %s, %s)
                ON CONFLICT ("gameId", "playerExternalId") DO UPDATE SET
                    "q1Fga" = EXCLUDED."q1Fga", "q1Fgm" = EXCLUDED."q1Fgm",
                    "q4Pts" = EXCLUDED."q4Pts", "clutchPts" = EXCLUDED."clutchPts"
            ''', (
                game_id, game_date, str(player_id), stats['name'], stats['team'],
                q1['fga'], q1['fgm'], q1['fta'], q1['ftm'], q1['ast'], q1['to'], q1['pts'],
                q2['fga'], q2['fgm'], q2['fta'], q2['ftm'], q2['ast'], q2['to'], q2['pts'],
                q3['fga'], q3['fgm'], q3['fta'], q3['ftm'], q3['ast'], q3['to'], q3['pts'],
                q4['fga'], q4['fgm'], q4['fta'], q4['ftm'], q4['ast'], q4['to'], q4['pts'],
                ot['fga'], ot['fgm'], ot['fta'], ot['ftm'], ot['ast'], ot['to'], ot['pts'],
                clutch['fga'], clutch['fgm'], clutch['fta'], clutch['ftm'], clutch['ast'], clutch['to'], clutch['pts']
            ))
            total_players += 1

        conn.commit()
        print(f"{len(player_stats)} players")

        # Rate limiting
        time.sleep(random.uniform(1.5, 3))

    cur.execute('SELECT COUNT(*) FROM "PlayerQuarterStats"')
    total = cur.fetchone()[0]

    cur.close()
    conn.close()
    print(f"\nPlayerQuarterStats complete: {total} records (added {total_players} this run)")

if __name__ == '__main__':
    scrape_nba_pbp()
