#!/usr/bin/env python3
"""
Build player mappings from SportsGameOdds player IDs.

Extracts player names from SGO format (LEBRON_JAMES_1_NBA) and matches
them to player stats in PlayerGameMetric to enable prop outcome analysis.
"""

import os
import sys
import psycopg2
import re
from collections import defaultdict

DATABASE_URL = os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]

def normalize_name(name):
    """Normalize a player name for matching."""
    if not name:
        return ''
    name = name.lower().strip()
    # Remove suffixes like Jr., III, etc.
    name = re.sub(r'\s+(jr\.?|sr\.?|ii|iii|iv|v)$', '', name, flags=re.IGNORECASE)
    # Remove apostrophes and hyphens
    name = re.sub(r"['\-\.]", '', name)
    # Collapse multiple spaces
    name = re.sub(r'\s+', ' ', name)
    return name.strip()

def extract_name_from_sgo_id(sgo_id):
    """Extract player name from SGO format ID.

    Examples:
    - LEBRON_JAMES_1_NBA -> lebron james
    - DEAARON_FOX_1_NBA -> deaaron fox
    - KARLANTHONY_TOWNS_1_NBA -> karlanthony towns
    """
    if not sgo_id:
        return None

    # Remove the _N_LEAGUE suffix
    match = re.match(r'^(.+)_\d+_(NBA|NFL|NHL|MLB|WNBA|NCAAB|NCAAF)$', sgo_id, re.IGNORECASE)
    if not match:
        return None

    name_part = match.group(1)
    league = match.group(2).upper()

    # Convert underscores to spaces
    name = name_part.replace('_', ' ').lower()

    return normalize_name(name), league

def get_db_connection():
    """Get database connection."""
    return psycopg2.connect(DATABASE_URL)

def build_mappings():
    """Main function to build player mappings."""
    conn = get_db_connection()
    cur = conn.cursor()

    # Get all distinct SGO player IDs from props
    print("Fetching distinct SGO player IDs from props...")
    cur.execute("""
        SELECT DISTINCT "playerExternalId", league
        FROM "PlayerPropLine"
        WHERE "playerExternalId" ~ '^[A-Z]+_[A-Z]+.*_\d+_(NBA|NFL|NHL|MLB|WNBA)$'
    """)

    sgo_ids = cur.fetchall()
    print(f"Found {len(sgo_ids)} distinct SGO-format player IDs")

    # Build lookup of player stats by normalized name and league
    print("\nBuilding player stats lookup...")
    cur.execute("""
        SELECT DISTINCT "playerName", "playerExternalId", league
        FROM "PlayerGameMetric"
        WHERE "playerName" IS NOT NULL
    """)

    # Map normalized name -> (original_name, external_id)
    stats_by_name = defaultdict(list)
    for row in cur.fetchall():
        player_name, external_id, league = row
        norm_name = normalize_name(player_name)
        stats_by_name[(norm_name, league.lower())].append({
            'name': player_name,
            'external_id': external_id
        })

    print(f"Loaded {len(stats_by_name)} unique player/league combinations from stats")

    # Get existing mappings
    cur.execute("""
        SELECT "normalizedName", league, "sgoId"
        FROM "PlayerMapping"
        WHERE "sgoId" IS NOT NULL
    """)
    existing = set((row[0], row[1]) for row in cur.fetchall())
    print(f"Found {len(existing)} existing mappings with sgoId")

    # Process each SGO ID
    new_mappings = 0
    updated_mappings = 0
    not_found = 0

    for sgo_id, prop_league in sgo_ids:
        result = extract_name_from_sgo_id(sgo_id)
        if not result:
            continue

        extracted_name, extracted_league = result
        league = prop_league.lower()

        # Check if we have stats for this player
        matches = stats_by_name.get((extracted_name, league), [])

        if not matches:
            # Try without league (cross-league matching)
            for (name, lg), players in stats_by_name.items():
                if name == extracted_name:
                    matches = players
                    break

        if matches:
            # Use the first match (could improve with fuzzy matching)
            match = matches[0]

            # Check if mapping exists
            cur.execute("""
                SELECT id FROM "PlayerMapping"
                WHERE league = %s AND "normalizedName" = %s
            """, (league, extracted_name))
            existing_row = cur.fetchone()

            if existing_row:
                # Update existing mapping with sgoId
                cur.execute("""
                    UPDATE "PlayerMapping"
                    SET "sgoId" = %s, "playerName" = COALESCE("playerName", %s)
                    WHERE id = %s AND ("sgoId" IS NULL OR "sgoId" = '')
                """, (sgo_id, match['name'], existing_row[0]))
                if cur.rowcount > 0:
                    updated_mappings += 1
            else:
                # Create new mapping
                cur.execute("""
                    INSERT INTO "PlayerMapping" (
                        league, "playerName", "normalizedName", "sgoId", "bdlId"
                    ) VALUES (%s, %s, %s, %s, %s)
                    ON CONFLICT (league, "normalizedName") DO UPDATE
                    SET "sgoId" = EXCLUDED."sgoId"
                    WHERE "PlayerMapping"."sgoId" IS NULL
                """, (league, match['name'], extracted_name, sgo_id, match.get('external_id')))
                if cur.rowcount > 0:
                    new_mappings += 1
        else:
            not_found += 1
            if not_found <= 10:
                print(f"  No stats match for: {sgo_id} -> {extracted_name}")

    conn.commit()

    print(f"\n=== Summary ===")
    print(f"New mappings created: {new_mappings}")
    print(f"Existing mappings updated with sgoId: {updated_mappings}")
    print(f"Players not found in stats: {not_found}")

    # Report current mapping coverage
    cur.execute("""
        SELECT
            league,
            COUNT(*) as total_mappings,
            COUNT("sgoId") as with_sgo_id,
            COUNT("bdlId") as with_bdl_id
        FROM "PlayerMapping"
        GROUP BY league
        ORDER BY COUNT(*) DESC
    """)

    print("\n=== Mapping Coverage by League ===")
    for row in cur.fetchall():
        league, total, with_sgo, with_bdl = row
        print(f"  {league}: {total} total, {with_sgo} with SGO ID, {with_bdl} with BDL ID")

    cur.close()
    conn.close()

if __name__ == '__main__':
    if not DATABASE_URL:
        print("Error: SPORTS_DATABASE_URL not set")
        sys.exit(1)

    build_mappings()
