#!/usr/bin/env python3
"""
Link Player Props to Actual Game Outcomes
Matches props to player game stats to calculate hit rates.

Run: python3 scripts/link_props_to_outcomes.py
"""
import psycopg2
import os
import re
from datetime import datetime, timezone
from collections import defaultdict

# Prop type to stat key mapping
# Note: Some props include suffixes like "-PLAYER_NAME-game-ou-over", extract base type first
PROP_TO_STAT = {
    'nba': {
        'points': ['points', 'espn_pts'],
        'assists': ['assists', 'espn_ast'],
        'rebounds': ['rebounds', 'espn_reb'],
        'steals': ['steals', 'espn_stl'],
        'blocks': ['blocks', 'espn_blk'],
        'threepointersmade': ['threes', 'nba_fg3m'],
        'threepointersattempted': ['nba_fg3a'],
        'fieldgoalsmade': ['nba_fgm'],
        'fieldgoalsattempted': ['nba_fga'],
        'freethrowsmade': ['nba_ftm'],
        'freethrowsattempted': ['nba_fta'],
        'turnovers': ['espn_to', 'nba_tov'],
        'minutes': ['nba_min', 'espn_min'],
        'fantasyscore': [],  # Complex calculation, skip for now
        # Two pointers require calculation - skip for now
        'twopointersmade': [],
        'twopointersattempted': [],
        # Combined props
        'points+rebounds': ['points', 'rebounds'],
        'points+assists': ['points', 'assists'],
        'rebounds+assists': ['rebounds', 'assists'],
        'points+rebounds+assists': ['points', 'rebounds', 'assists'],
        'blocks+steals': ['blocks', 'steals'],
    },
    'nfl': {
        'passing_yards': ['passing_yards', 'pass_yds'],
        'rushing_yards': ['rushing_yards', 'rush_yds'],
        'receiving_yards': ['receiving_yards', 'rec_yds'],
        'passing_touchdowns': ['passing_touchdowns', 'pass_td'],
        'rushing_touchdowns': ['rushing_touchdowns', 'rush_td'],
        'receiving_touchdowns': ['receiving_touchdowns', 'rec_td'],
        'touchdowns': ['touchdowns', 'total_td'],
        'receptions': ['receptions', 'rec'],
        'completions': ['completions', 'pass_cmp'],
        'interceptions': ['interceptions', 'pass_int'],
    },
    'nhl': {
        'points': ['points', 'nhl_pts'],
        'goals': ['goals', 'nhl_goals'],
        'assists': ['assists', 'nhl_assists'],
        'shots_on_goal': ['shots', 'shots_on_goal', 'sog'],
        'saves': ['saves', 'goalie_saves'],
        'power_play_points': ['power_play_points', 'pp_pts'],
    },
}


def extract_base_prop_type(prop_type):
    """Extract base prop type from complex prop type strings.

    Examples:
    - 'points' -> 'points'
    - 'points-LEBRON_JAMES_1_NBA-game-ou-over' -> 'points'
    - 'fieldGoalsMade-PLAYER_NAME-1q-ou-under' -> 'fieldGoalsMade'
    - 'blocks+steals' -> 'blocks+steals'
    """
    if not prop_type:
        return None
    # If contains dash followed by uppercase (player name), extract first part
    if '-' in prop_type:
        parts = prop_type.split('-')
        # Check if second part looks like player ID (uppercase)
        if len(parts) > 1 and parts[1].isupper():
            return parts[0].lower()
    return prop_type.lower()


def normalize_player_name(name):
    """Normalize player name for matching.

    Handles variations like:
    - De'Aaron Fox vs DeAaron Fox vs deaaron fox
    - Nickeil Alexander-Walker vs Nickeil Alexanderwalker
    - Kevin Porter Jr. vs Kevin Porter
    """
    if not name:
        return None
    # Remove suffixes like Jr., Sr., III, II, IV, V at end
    name = re.sub(r'\s+(Jr\.?|Sr\.?|III|II|IV|V)$', '', name, flags=re.IGNORECASE)
    # Also handle "Jr." in middle of name (like "Jaime Jr. Jaquez")
    name = re.sub(r'\s+Jr\.?\s+', ' ', name, flags=re.IGNORECASE)
    # Remove apostrophes, hyphens, periods
    name = name.replace("'", "").replace("-", "").replace(".", "")
    # Remove extra spaces and lowercase
    name = ' '.join(name.split()).lower()
    return name


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().strip('"').split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def run_linkage():
    print("=" * 60)
    print("LINK PROPS TO OUTCOMES")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    db_url = load_db_url()
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    # Ensure outcome columns exist
    print("\n[1] Ensuring outcome columns exist...")
    for col, col_type in [
        ('actualValue', 'DOUBLE PRECISION'),
        ('result', 'VARCHAR(10)'),
        ('linkedAt', 'TIMESTAMP'),
    ]:
        try:
            cur.execute(f'''
                ALTER TABLE "PlayerPropLine" ADD COLUMN IF NOT EXISTS "{col}" {col_type}
            ''')
        except Exception as e:
            pass
    conn.commit()
    print("  Columns ready")

    # Get props that need linking (have gameId, no result yet, game completed)
    print("\n[2] Finding props to link...")
    cur.execute('''
        SELECT
            ppl.id, ppl.league, ppl."gameId", ppl."playerExternalId",
            ppl."propType", ppl."lineValue",
            p.name as player_name,
            sg."homeTeam", sg."awayTeam", sg."gameDate"::date as game_date
        FROM "PlayerPropLine" ppl
        JOIN "Player" p ON ppl."playerExternalId" = p."externalPlayerId" AND ppl.league = p.league
        JOIN "SportsGame" sg ON ppl."gameId" = sg.id
        WHERE ppl.result IS NULL
          AND sg."homeScore" IS NOT NULL
          AND ppl.league IN ('nba', 'nfl', 'nhl')
        ORDER BY ppl.league, ppl."gameId"
    ''')
    props_to_link = cur.fetchall()
    print(f"  Found {len(props_to_link)} props to link")

    if not props_to_link:
        print("  No props need linking")
        cur.close()
        conn.close()
        return {'linked': 0, 'over': 0, 'under': 0, 'push': 0}

    # Build player game stats lookup
    # Key: (league, normalized_player_name, game_date, team) -> {stat_key: value}
    print("\n[3] Building player stats lookup...")

    # Get relevant dates and leagues
    dates = set()
    leagues = set()
    for row in props_to_link:
        leagues.add(row[1])
        dates.add(str(row[9]))

    date_list = "', '".join(sorted(dates))
    league_list = "', '".join(sorted(leagues))

    cur.execute(f'''
        SELECT
            league,
            "playerName",
            "gameDate"::date as game_date,
            team,
            "statKey",
            value
        FROM "PlayerGameMetric"
        WHERE league IN ('{league_list}')
          AND "gameDate"::date IN ('{date_list}')
    ''')

    stats_lookup = defaultdict(dict)
    for row in cur.fetchall():
        league, player_name, game_date, team, stat_key, value = row
        norm_name = normalize_player_name(player_name)
        key = (league, norm_name, str(game_date))
        stats_lookup[key][stat_key] = value

    print(f"  Loaded stats for {len(stats_lookup)} player-games")

    # Process each prop
    print("\n[4] Linking props to outcomes...")
    updates = []
    stats = {'linked': 0, 'over': 0, 'under': 0, 'push': 0, 'no_match': 0}

    for prop_row in props_to_link:
        prop_id, league, game_id, player_ext_id, prop_type, line_value, player_name, home, away, game_date = prop_row

        norm_name = normalize_player_name(player_name)
        key = (league, norm_name, str(game_date))

        player_stats = stats_lookup.get(key, {})
        if not player_stats:
            stats['no_match'] += 1
            continue

        # Extract base prop type (remove player ID suffixes)
        base_prop_type = extract_base_prop_type(prop_type)

        # Get stat mappings for this prop type
        stat_keys = PROP_TO_STAT.get(league, {}).get(base_prop_type, [])
        if not stat_keys:
            # Try direct match with the base type
            stat_keys = [base_prop_type] if base_prop_type else []

        # Skip props with empty stat mappings (like fantasyScore)
        if not stat_keys:
            stats['no_match'] += 1
            continue

        # Calculate actual value
        actual_value = None
        if '+' in base_prop_type or len(stat_keys) > 1:
            # Combined prop - sum the stats
            values = []
            for sk in stat_keys:
                if sk in player_stats:
                    values.append(player_stats[sk])
                else:
                    # Try to find a matching stat
                    for actual_sk in player_stats:
                        if sk in actual_sk.lower() or actual_sk.lower() in sk:
                            values.append(player_stats[actual_sk])
                            break
            if values:
                actual_value = sum(values)
        else:
            # Single stat prop
            for sk in stat_keys:
                if sk in player_stats:
                    actual_value = player_stats[sk]
                    break
                # Try partial match
                for actual_sk in player_stats:
                    if sk in actual_sk.lower() or actual_sk.lower() in sk:
                        actual_value = player_stats[actual_sk]
                        break
                if actual_value is not None:
                    break

        if actual_value is None:
            stats['no_match'] += 1
            continue

        # Determine result
        if actual_value > line_value:
            result = 'over'
            stats['over'] += 1
        elif actual_value < line_value:
            result = 'under'
            stats['under'] += 1
        else:
            result = 'push'
            stats['push'] += 1

        stats['linked'] += 1
        updates.append((actual_value, result, prop_id))

    # Apply updates
    if updates:
        print(f"\n[5] Updating {len(updates)} props...")
        cur.executemany('''
            UPDATE "PlayerPropLine"
            SET "actualValue" = %s, result = %s, "linkedAt" = NOW()
            WHERE id = %s
        ''', updates)
        conn.commit()
        print(f"  Updated {len(updates)} props")

    # Report
    print("\n" + "=" * 60)
    print("RESULTS:")
    print(f"  Props linked: {stats['linked']}")
    print(f"  Over hits: {stats['over']} ({100*stats['over']/max(stats['linked'],1):.1f}%)")
    print(f"  Under hits: {stats['under']} ({100*stats['under']/max(stats['linked'],1):.1f}%)")
    print(f"  Pushes: {stats['push']}")
    print(f"  No stat match: {stats['no_match']}")
    print("=" * 60)

    # Show sample linked props
    cur.execute('''
        SELECT
            p.name, ppl."propType", ppl."lineValue", ppl."actualValue", ppl.result
        FROM "PlayerPropLine" ppl
        JOIN "Player" p ON ppl."playerExternalId" = p."externalPlayerId" AND ppl.league = p.league
        WHERE ppl.result IS NOT NULL
          AND ppl.league = 'nba'
        ORDER BY ppl."linkedAt" DESC
        LIMIT 10
    ''')
    print("\nSample linked props (NBA):")
    for row in cur.fetchall():
        name, prop_type, line, actual, result = row
        print(f"  {name}: {prop_type} {line} → {actual} ({result})")

    cur.close()
    conn.close()

    return stats


if __name__ == '__main__':
    run_linkage()
