#!/usr/bin/env python3
"""
Build prop outcome data by joining player props to actual game stats.

Creates a PropOutcome table that links PlayerPropLine to PlayerGameMetric
to enable prop hit rate analysis.
"""

import os
import sys
import psycopg2
import re

DATABASE_URL = os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]

# Map prop types to stat keys
PROP_TO_STAT = {
    # NBA - standard props
    'points': ['points'],
    'rebounds': ['rebounds'],
    'assists': ['assists'],
    'steals': ['steals'],
    'blocks': ['blocks'],
    'threes': ['threes', 'fg3m', 'three_pointers_made'],
    'threepointersmade': ['threes', 'fg3m', 'three_pointers_made'],
    'turnovers': ['turnovers'],
    'fieldgoalsmade': ['fgm', 'field_goals_made'],
    'twopointersmade': ['two_pointers_made'],
    'pra': None,  # Calculated: points + rebounds + assists
    'pr': None,   # Calculated: points + rebounds
    'pa': None,   # Calculated: points + assists
    'ra': None,   # Calculated: rebounds + assists
    'pointsreboundsassists': None,  # Same as PRA
    'pointsrebounds': None,  # Same as PR
    'pointsassists': None,  # Same as PA
    'reboundsassists': None,  # Same as RA
    'blockssteals': None,  # Calculated: blocks + steals

    # NFL
    'passing_yards': ['passing_yards'],
    'passingyards': ['passing_yards'],
    'rushing_yards': ['rushing_yards'],
    'rushingyards': ['rushing_yards'],
    'receiving_yards': ['receiving_yards'],
    'receivingyards': ['receiving_yards'],
    'touchdowns': ['touchdowns', 'passing_tds', 'rushing_tds'],
    'receptions': ['receptions'],
    'completions': ['completions'],
    'interceptions': ['interceptions'],
    'passingtouchdowns': ['passing_tds'],
    'rushingtouchdowns': ['rushing_tds'],

    # NHL
    'goals': ['goals'],
    'shots': ['shots', 'shots_on_goal'],
    'saves': ['saves', 'nhl_g_saves'],
    'powerplaypoints': ['power_play_points'],
}

def normalize_name(name):
    """Normalize a player name for matching."""
    if not name:
        return ''
    name = name.lower().strip()
    name = re.sub(r'\s+(jr\.?|sr\.?|ii|iii|iv|v)$', '', name, flags=re.IGNORECASE)
    name = re.sub(r"['\-\.]", '', name)
    name = re.sub(r'\s+', ' ', name)
    return name.strip()

def extract_name_from_sgo_id(sgo_id):
    """Extract player name from SGO format ID."""
    if not sgo_id:
        return None
    match = re.match(r'^(.+)_\d+_(NBA|NFL|NHL|MLB|WNBA|NCAAB|NCAAF)$', sgo_id, re.IGNORECASE)
    if not match:
        return None
    name_part = match.group(1)
    name = name_part.replace('_', ' ').lower()
    return normalize_name(name)

def get_db_connection():
    return psycopg2.connect(DATABASE_URL)

def create_outcome_table(cur):
    """Create PropOutcome table if not exists."""
    cur.execute("""
        CREATE TABLE IF NOT EXISTS "PropOutcome" (
            id BIGSERIAL PRIMARY KEY,
            "propLineId" BIGINT REFERENCES "PlayerPropLine"(id),
            "gameId" BIGINT,
            "gameDate" TIMESTAMP,
            league VARCHAR(20),
            "playerExternalId" VARCHAR(255),
            "playerName" VARCHAR(255),
            "propType" VARCHAR(50),
            "lineValue" FLOAT,
            "oddsAmerican" INT,
            "actualValue" FLOAT,
            "hit" BOOLEAN,
            "push" BOOLEAN DEFAULT FALSE,
            "createdAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE("propLineId")
        );

        CREATE INDEX IF NOT EXISTS idx_prop_outcome_league_prop ON "PropOutcome"(league, "propType");
        CREATE INDEX IF NOT EXISTS idx_prop_outcome_player ON "PropOutcome"("playerExternalId");
        CREATE INDEX IF NOT EXISTS idx_prop_outcome_game ON "PropOutcome"("gameId");
        CREATE INDEX IF NOT EXISTS idx_prop_outcome_date ON "PropOutcome"("gameDate");
    """)

def build_outcomes():
    """Main function to build prop outcomes."""
    conn = get_db_connection()
    cur = conn.cursor()

    # Create table
    create_outcome_table(cur)
    conn.commit()

    # Get props that don't have outcomes yet
    # Extract SGO player name from various sources:
    # 1. playerExternalId if it's SGO format (FIRSTNAME_LASTNAME_1_NBA)
    # 2. raw->'sportsgameodds'->>'statEntityID'
    # 3. Extract from raw->'sportsgameodds'->>'oddKey' (format: stat-PLAYERNAME_1_NBA-period-bet-side)
    print("Finding props without outcomes...")
    cur.execute("""
        SELECT
            p.id,
            p."gameId",
            g."gameDate",
            p.league,
            CASE
                WHEN p."playerExternalId" ~ '^[A-Z]+_.*_\d+_(NBA|NFL|NHL)$' THEN p."playerExternalId"
                WHEN p.raw->'sportsgameodds'->>'statEntityID' IS NOT NULL THEN p.raw->'sportsgameodds'->>'statEntityID'
                WHEN p.raw->'sportsgameodds'->>'playerID' IS NOT NULL THEN p.raw->'sportsgameodds'->>'playerID'
                WHEN p.raw->'sportsgameodds'->>'oddKey' ~ '-[A-Z]+_.*_\d+_(NBA|NFL|NHL)-' THEN
                    regexp_replace(p.raw->'sportsgameodds'->>'oddKey', '^[^-]+-([A-Z_0-9]+_(NBA|NFL|NHL))-.*$', '\\1')
                ELSE p."playerExternalId"
            END as player_id,
            p."propType",
            p."lineValue",
            p."oddsAmerican"
        FROM "PlayerPropLine" p
        JOIN "SportsGame" g ON p."gameId" = g.id
        LEFT JOIN "PropOutcome" o ON p.id = o."propLineId"
        WHERE o.id IS NULL
          AND g."gameDate" < NOW() - INTERVAL '3 hours'
          AND p."lineValue" > 0
          AND p.league IN ('nba', 'nhl', 'nfl')
        ORDER BY g."gameDate" DESC
        LIMIT 50000
    """)

    props = cur.fetchall()
    print(f"Found {len(props)} props to process")

    if not props:
        print("No props to process")
        return

    # Build a lookup of player stats by game
    print("\nLoading player game stats...")
    cur.execute("""
        SELECT
            "gameKey",
            "gameDate"::date as game_date,
            "playerExternalId",
            "playerName",
            "statKey",
            "value",
            league
        FROM "PlayerGameMetric"
        WHERE "gameDate" > NOW() - INTERVAL '2 years'
    """)

    # Map: (game_date, normalized_name, league, stat_key) -> value
    stats_lookup = {}
    for row in cur.fetchall():
        game_key, game_date, player_id, player_name, stat_key, value, league = row
        norm_name = normalize_name(player_name) if player_name else None
        if norm_name:
            key = (game_date, norm_name, league.lower(), stat_key.lower())
            stats_lookup[key] = value
            # Also store by player_id for direct matching
            if player_id:
                key2 = (game_date, player_id.lower(), league.lower(), stat_key.lower())
                stats_lookup[key2] = value

    print(f"Loaded {len(stats_lookup)} stat records")

    # Process props
    outcomes = []
    matched = 0
    not_matched = 0

    for prop in props:
        prop_id, game_id, game_date, league, player_ext_id, prop_type, line_value, odds = prop

        # Try to find the actual stat value
        game_date_only = game_date.date() if hasattr(game_date, 'date') else game_date

        # Get player name from SGO ID
        player_name = extract_name_from_sgo_id(player_ext_id)
        if not player_name:
            not_matched += 1
            continue

        # Normalize prop type - extract just the stat name from SGO format
        # e.g., "points+assists-COBY_WHITE_1_NBA-game-ou-under" -> "points+assists"
        prop_type_clean = prop_type.split('-')[0] if '-' in prop_type and '_' in prop_type else prop_type
        prop_type_lower = prop_type_clean.lower().replace(' ', '').replace('_', '')

        # Find matching stat keys
        stat_keys = PROP_TO_STAT.get(prop_type_lower)

        actual_value = None

        if stat_keys:
            # Direct stat lookup
            for stat_key in stat_keys:
                key = (game_date_only, player_name, league.lower(), stat_key)
                if key in stats_lookup:
                    actual_value = stats_lookup[key]
                    break
        elif prop_type_lower in ('pra', 'pointsreboundsassists'):
            # Calculate PRA
            pts = stats_lookup.get((game_date_only, player_name, league.lower(), 'points'), 0)
            reb = stats_lookup.get((game_date_only, player_name, league.lower(), 'rebounds'), 0)
            ast = stats_lookup.get((game_date_only, player_name, league.lower(), 'assists'), 0)
            if pts or reb or ast:
                actual_value = pts + reb + ast
        elif prop_type_lower in ('pr', 'pointsrebounds'):
            pts = stats_lookup.get((game_date_only, player_name, league.lower(), 'points'), 0)
            reb = stats_lookup.get((game_date_only, player_name, league.lower(), 'rebounds'), 0)
            if pts or reb:
                actual_value = pts + reb
        elif prop_type_lower in ('pa', 'pointsassists'):
            pts = stats_lookup.get((game_date_only, player_name, league.lower(), 'points'), 0)
            ast = stats_lookup.get((game_date_only, player_name, league.lower(), 'assists'), 0)
            if pts or ast:
                actual_value = pts + ast
        elif prop_type_lower in ('ra', 'reboundsassists'):
            reb = stats_lookup.get((game_date_only, player_name, league.lower(), 'rebounds'), 0)
            ast = stats_lookup.get((game_date_only, player_name, league.lower(), 'assists'), 0)
            if reb or ast:
                actual_value = reb + ast
        elif prop_type_lower in ('blockssteals', 'blocks+steals'):
            blk = stats_lookup.get((game_date_only, player_name, league.lower(), 'blocks'), 0)
            stl = stats_lookup.get((game_date_only, player_name, league.lower(), 'steals'), 0)
            if blk or stl:
                actual_value = blk + stl

        if actual_value is not None:
            matched += 1
            # Determine hit/push
            hit = actual_value > line_value
            push = abs(actual_value - line_value) < 0.01  # Handle floating point

            outcomes.append((
                prop_id, game_id, game_date, league, player_ext_id, player_name,
                prop_type_clean, line_value, odds, actual_value, hit, push
            ))

            if len(outcomes) % 5000 == 0:
                print(f"  Processed {len(outcomes)} outcomes...")
        else:
            not_matched += 1

    print(f"\nMatched: {matched}, Not matched: {not_matched}")

    # Insert outcomes
    if outcomes:
        print(f"Inserting {len(outcomes)} outcomes...")
        cur.executemany("""
            INSERT INTO "PropOutcome" (
                "propLineId", "gameId", "gameDate", league, "playerExternalId",
                "playerName", "propType", "lineValue", "oddsAmerican",
                "actualValue", hit, push
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON CONFLICT ("propLineId") DO NOTHING
        """, outcomes)
        conn.commit()
        print(f"Inserted {cur.rowcount} new outcomes")

    # Report hit rates
    print("\n=== Prop Hit Rates by Type ===")
    cur.execute("""
        SELECT
            league,
            "propType",
            COUNT(*) as total,
            SUM(CASE WHEN hit THEN 1 ELSE 0 END) as hits,
            ROUND(100.0 * SUM(CASE WHEN hit THEN 1 ELSE 0 END) / COUNT(*), 1) as hit_rate
        FROM "PropOutcome"
        WHERE NOT push
        GROUP BY league, "propType"
        HAVING COUNT(*) >= 50
        ORDER BY league, hit_rate DESC
    """)

    for row in cur.fetchall():
        league, prop_type, total, hits, hit_rate = row
        print(f"  {league} {prop_type}: {hits}/{total} ({hit_rate}%)")

    cur.close()
    conn.close()

if __name__ == '__main__':
    if not DATABASE_URL:
        print("Error: SPORTS_DATABASE_URL not set")
        sys.exit(1)

    build_outcomes()
