#!/usr/bin/env python3
"""
Scrape NBA Shot Chart Data
Source: stats.nba.com/stats/shotchartdetail
Extracts: Shot locations (paint, midrange, 3PT), shot type, make/miss
"""
import psycopg2
import requests
import time
import random
from datetime import datetime, timedelta

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

HEADERS = {
    'Host': 'stats.nba.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.9',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
}

def classify_shot_zone(x, y, distance):
    """Classify shot into zone based on coordinates"""
    # Court coordinates: x is left-right (-250 to 250), y is baseline to half (0 to ~940)
    # Paint is roughly within x: -80 to 80, y: 0 to ~190
    # 3PT line is at ~237 from basket (variable corners)

    if distance is None:
        return 'UNKNOWN'

    if distance <= 4:
        return 'RIM'  # At rim / layup
    elif distance <= 8:
        return 'PAINT'  # In the paint
    elif distance <= 16:
        return 'MIDRANGE'  # Midrange
    elif distance >= 22:
        return 'THREE'  # 3PT (corner 3s are ~22ft)
    else:
        return 'LONG_TWO'  # Long 2

def get_shot_data(game_id):
    """Fetch shot chart data for a game"""
    url = 'https://stats.nba.com/stats/shotchartdetail'
    params = {
        'GameID': game_id,
        'Season': '2024-25',
        'SeasonType': 'Regular Season',
        'PlayerID': 0,  # 0 = all players
        'TeamID': 0,
        'Outcome': '',
        'Position': '',
        'RookieYear': '',
        'GameSegment': '',
        'Period': 0,
        'LastNGames': 0,
        'DateFrom': '',
        'DateTo': '',
        'OpponentTeamID': 0,
        'VsConference': '',
        'VsDivision': '',
        'ContextMeasure': 'FGA',
        'ContextFilter': '',
        'PlayerPosition': '',
        'LeagueID': '00',
    }

    try:
        response = requests.get(url, headers=HEADERS, params=params, timeout=30)
        if response.status_code == 200:
            data = response.json()
            if 'resultSets' in data and len(data['resultSets']) > 0:
                headers = data['resultSets'][0]['headers']
                rows = data['resultSets'][0]['rowSet']
                return [dict(zip(headers, row)) for row in rows]
    except Exception as e:
        print(f"    Error fetching shots for {game_id}: {e}")
    return None

def scrape_nba_shots():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create PlayerShotLocation table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PlayerShotLocation" (
            id SERIAL PRIMARY KEY,
            "gameId" VARCHAR(20) NOT NULL,
            "gameDate" DATE NOT NULL,
            "playerExternalId" VARCHAR(50),
            "playerName" VARCHAR(200),
            team VARCHAR(10),
            period INTEGER,
            "minutesRemaining" INTEGER,
            "shotType" VARCHAR(50),
            "shotZone" VARCHAR(20),
            "shotDistance" INTEGER,
            "locX" INTEGER,
            "locY" INTEGER,
            "shotMade" BOOLEAN,
            -- Derived zone
            "zoneClass" VARCHAR(20),
            "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
        )
    ''')

    # Create aggregated table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PlayerShotZoneSummary" (
            id SERIAL PRIMARY KEY,
            "playerExternalId" VARCHAR(50),
            "playerName" VARCHAR(200),
            team VARCHAR(10),
            season VARCHAR(20),
            -- Zone attempts
            "rimAttempts" INTEGER DEFAULT 0,
            "rimMakes" INTEGER DEFAULT 0,
            "paintAttempts" INTEGER DEFAULT 0,
            "paintMakes" INTEGER DEFAULT 0,
            "midrangeAttempts" INTEGER DEFAULT 0,
            "midrangeMakes" INTEGER DEFAULT 0,
            "threeAttempts" INTEGER DEFAULT 0,
            "threeMakes" INTEGER DEFAULT 0,
            -- Percentages
            "rimPct" NUMERIC,
            "paintPct" NUMERIC,
            "midrangePct" NUMERIC,
            "threePct" NUMERIC,
            -- Shot distribution
            "rimRate" NUMERIC,
            "paintRate" NUMERIC,
            "midrangeRate" NUMERIC,
            "threeRate" NUMERIC,
            -- Q4/Clutch shots
            "q4ThreeAttempts" INTEGER DEFAULT 0,
            "q4RimAttempts" INTEGER DEFAULT 0,
            "clutchThreeAttempts" INTEGER DEFAULT 0,
            "clutchRimAttempts" INTEGER DEFAULT 0,
            "updatedAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE("playerExternalId", team, season)
        )
    ''')
    conn.commit()
    print("PlayerShotLocation and PlayerShotZoneSummary tables created/verified")

    # Get recent NBA games
    cur.execute('''
        SELECT DISTINCT sg."externalGameId", sg."gameDate"
        FROM "SportsGame" sg
        LEFT JOIN "PlayerShotLocation" psl ON sg."externalGameId" = psl."gameId"
        WHERE sg.league = 'nba'
          AND sg."homeScore" IS NOT NULL
          AND sg."gameDate" >= CURRENT_DATE - INTERVAL '30 days'
          AND psl.id IS NULL
          AND sg."externalGameId" LIKE '00%'
        ORDER BY sg."gameDate" DESC
        LIMIT 30
    ''')
    games = cur.fetchall()
    print(f"\nFound {len(games)} games to scrape shot data")

    total_shots = 0
    for game_id, game_date in games:
        print(f"  Scraping {game_id} ({game_date})...", end=' ')

        shot_data = get_shot_data(game_id)
        if not shot_data:
            print("no data")
            time.sleep(random.uniform(2, 4))
            continue

        game_shots = 0
        for shot in shot_data:
            player_id = shot.get('PLAYER_ID')
            player_name = shot.get('PLAYER_NAME')
            team = shot.get('TEAM_NAME', '')[:3].upper() if shot.get('TEAM_NAME') else None
            period = shot.get('PERIOD')
            mins_remain = shot.get('MINUTES_REMAINING')
            shot_type = shot.get('ACTION_TYPE')
            shot_zone = shot.get('SHOT_ZONE_BASIC')
            distance = shot.get('SHOT_DISTANCE')
            loc_x = shot.get('LOC_X')
            loc_y = shot.get('LOC_Y')
            made = shot.get('SHOT_MADE_FLAG') == 1

            zone_class = classify_shot_zone(loc_x, loc_y, distance)

            cur.execute('''
                INSERT INTO "PlayerShotLocation"
                ("gameId", "gameDate", "playerExternalId", "playerName", team,
                 period, "minutesRemaining", "shotType", "shotZone", "shotDistance",
                 "locX", "locY", "shotMade", "zoneClass")
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ''', (
                game_id, game_date, str(player_id), player_name, team,
                period, mins_remain, shot_type, shot_zone, distance,
                loc_x, loc_y, made, zone_class
            ))
            game_shots += 1
            total_shots += 1

        conn.commit()
        print(f"{game_shots} shots")
        time.sleep(random.uniform(1.5, 3))

    # Compute summary stats
    print("\nComputing shot zone summaries...")
    cur.execute('''
        INSERT INTO "PlayerShotZoneSummary"
        ("playerExternalId", "playerName", team, season,
         "rimAttempts", "rimMakes", "paintAttempts", "paintMakes",
         "midrangeAttempts", "midrangeMakes", "threeAttempts", "threeMakes",
         "rimPct", "paintPct", "midrangePct", "threePct",
         "rimRate", "paintRate", "midrangeRate", "threeRate",
         "q4ThreeAttempts", "q4RimAttempts")
        SELECT
            "playerExternalId",
            MAX("playerName"),
            MAX(team),
            EXTRACT(YEAR FROM "gameDate")::text,
            COUNT(CASE WHEN "zoneClass" = 'RIM' THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'RIM' AND "shotMade" THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'PAINT' THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'PAINT' AND "shotMade" THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'MIDRANGE' THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'MIDRANGE' AND "shotMade" THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'THREE' THEN 1 END),
            COUNT(CASE WHEN "zoneClass" = 'THREE' AND "shotMade" THEN 1 END),
            -- Percentages
            ROUND(COUNT(CASE WHEN "zoneClass" = 'RIM' AND "shotMade" THEN 1 END)::numeric /
                  NULLIF(COUNT(CASE WHEN "zoneClass" = 'RIM' THEN 1 END), 0), 3),
            ROUND(COUNT(CASE WHEN "zoneClass" = 'PAINT' AND "shotMade" THEN 1 END)::numeric /
                  NULLIF(COUNT(CASE WHEN "zoneClass" = 'PAINT' THEN 1 END), 0), 3),
            ROUND(COUNT(CASE WHEN "zoneClass" = 'MIDRANGE' AND "shotMade" THEN 1 END)::numeric /
                  NULLIF(COUNT(CASE WHEN "zoneClass" = 'MIDRANGE' THEN 1 END), 0), 3),
            ROUND(COUNT(CASE WHEN "zoneClass" = 'THREE' AND "shotMade" THEN 1 END)::numeric /
                  NULLIF(COUNT(CASE WHEN "zoneClass" = 'THREE' THEN 1 END), 0), 3),
            -- Rates
            ROUND(COUNT(CASE WHEN "zoneClass" = 'RIM' THEN 1 END)::numeric / NULLIF(COUNT(*), 0), 3),
            ROUND(COUNT(CASE WHEN "zoneClass" = 'PAINT' THEN 1 END)::numeric / NULLIF(COUNT(*), 0), 3),
            ROUND(COUNT(CASE WHEN "zoneClass" = 'MIDRANGE' THEN 1 END)::numeric / NULLIF(COUNT(*), 0), 3),
            ROUND(COUNT(CASE WHEN "zoneClass" = 'THREE' THEN 1 END)::numeric / NULLIF(COUNT(*), 0), 3),
            -- Q4 shots
            COUNT(CASE WHEN period = 4 AND "zoneClass" = 'THREE' THEN 1 END),
            COUNT(CASE WHEN period = 4 AND "zoneClass" = 'RIM' THEN 1 END)
        FROM "PlayerShotLocation"
        GROUP BY "playerExternalId", EXTRACT(YEAR FROM "gameDate")
        HAVING COUNT(*) >= 10
        ON CONFLICT ("playerExternalId", team, season) DO UPDATE SET
            "rimAttempts" = EXCLUDED."rimAttempts",
            "rimMakes" = EXCLUDED."rimMakes",
            "threeAttempts" = EXCLUDED."threeAttempts",
            "threeMakes" = EXCLUDED."threeMakes",
            "rimPct" = EXCLUDED."rimPct",
            "threePct" = EXCLUDED."threePct",
            "rimRate" = EXCLUDED."rimRate",
            "threeRate" = EXCLUDED."threeRate",
            "q4ThreeAttempts" = EXCLUDED."q4ThreeAttempts",
            "q4RimAttempts" = EXCLUDED."q4RimAttempts",
            "updatedAt" = NOW()
    ''')
    conn.commit()

    cur.execute('SELECT COUNT(*) FROM "PlayerShotLocation"')
    total_locs = cur.fetchone()[0]
    cur.execute('SELECT COUNT(*) FROM "PlayerShotZoneSummary"')
    total_summary = cur.fetchone()[0]

    cur.close()
    conn.close()
    print(f"\nShot data complete: {total_locs} shot locations, {total_summary} player summaries")

if __name__ == '__main__':
    scrape_nba_shots()
