#!/usr/bin/env python3
"""
Build MLB Player ID Mapping Table
Maps MLB Stats API IDs to SGO IDs via player names
Enables joining xBA/xSLG stats with prop lines
"""
import psycopg2
import re
import json
from datetime import datetime, timezone

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def normalize_name(name):
    """Normalize player name for matching"""
    if not name:
        return ''
    # Convert SGO format: JOSE_RAMIREZ_1_MLB -> jose ramirez
    name = re.sub(r'_\d+_MLB$', '', name, flags=re.IGNORECASE)
    name = name.replace('_', ' ').lower().strip()
    # Convert "Last, First" to "First Last"
    if ',' in name:
        parts = [p.strip() for p in name.split(',')]
        if len(parts) == 2:
            name = f"{parts[1]} {parts[0]}"
    # Remove suffixes like Jr., III, etc.
    name = re.sub(r'\s+(jr\.?|sr\.?|ii|iii|iv)$', '', name, flags=re.IGNORECASE)
    # Remove apostrophes and hyphens for matching
    name = re.sub(r"['\-]", '', name)
    return name

def build_mapping():
    """Build the MLB player ID mapping table"""
    db_url = load_db_url()
    if not db_url:
        print('Error: SPORTS_DATABASE_URL not found')
        return
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    # Extract SGO IDs from raw JSON in PlayerPropLine
    print('Extracting MLB players from PlayerPropLine raw JSON...')
    cur.execute('''
        SELECT DISTINCT raw->'sportsgameodds'->>'playerID' as sgo_id
        FROM "PlayerPropLine"
        WHERE league = 'mlb'
        AND raw->'sportsgameodds'->>'playerID' LIKE '%_MLB'
    ''')
    sgo_players = cur.fetchall()
    print(f'  Found {len(sgo_players)} SGO name-style MLB player IDs')
    
    # Build SGO mapping
    sgo_mapping = {}
    for (sgo_id,) in sgo_players:
        if not sgo_id:
            continue
        # Extract name from SGO ID: JOSE_RAMIREZ_1_MLB -> Jose Ramirez
        name_match = re.match(r'^([A-Z_]+)_\d+_MLB$', sgo_id, re.IGNORECASE)
        if name_match:
            name_parts = name_match.group(1).split('_')
            player_name = ' '.join(p.capitalize() for p in name_parts)
            normalized = normalize_name(player_name)
            
            sgo_mapping[normalized] = {
                'playerName': player_name,
                'normalizedName': normalized,
                'sgoId': sgo_id,
            }
    
    print(f'  Extracted {len(sgo_mapping)} unique player names from SGO')
    
    # Get unique player names from PlayerGameMetric (specifically Statcast data with 6-digit IDs)
    print('Fetching MLB players from PlayerGameMetric (Statcast)...')
    cur.execute('''
        SELECT DISTINCT "playerExternalId", "playerName"
        FROM "PlayerGameMetric"
        WHERE league = 'mlb'
        AND "playerName" IS NOT NULL
        AND "statKey" LIKE 'mlb_%'
        AND LENGTH("playerExternalId") >= 6
    ''')
    mlb_players = cur.fetchall()
    print(f'  Found {len(mlb_players)} MLB players with Statcast stats')
    
    # Build MLB Stats mapping
    mlb_mapping = {}
    for mlb_id, player_name in mlb_players:
        if not player_name:
            continue
        normalized = normalize_name(player_name)
        
        if normalized not in mlb_mapping:
            mlb_mapping[normalized] = {
                'mlbStatsId': mlb_id,
                'playerName': player_name,
                'normalizedName': normalized,
            }
    
    print(f'  Extracted {len(mlb_mapping)} unique player names from MLB Stats data')
    
    # Insert mappings
    insert_sql = '''
        INSERT INTO "PlayerMapping"
        (league, "playerName", "normalizedName", "nbaComId", "sgoId", "espnId")
        VALUES (%s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, "normalizedName")
        DO UPDATE SET
            "nbaComId" = COALESCE(EXCLUDED."nbaComId", "PlayerMapping"."nbaComId"),
            "sgoId" = COALESCE(EXCLUDED."sgoId", "PlayerMapping"."sgoId"),
            "updatedAt" = NOW()
    '''
    
    inserted = 0
    matched = 0
    
    # First, insert SGO players and try to match with MLB Stats IDs
    for normalized, data in sgo_mapping.items():
        mlb_id = None
        if normalized in mlb_mapping:
            mlb_id = mlb_mapping[normalized]['mlbStatsId']
            matched += 1
        
        try:
            cur.execute(insert_sql, (
                'mlb',
                data['playerName'],
                data['normalizedName'],
                mlb_id,  # Using nbaComId column for MLB Stats ID
                data['sgoId'],
                None,
            ))
            inserted += 1
        except Exception as e:
            print(f'  Error inserting {data["playerName"]}: {e}')
            conn.rollback()
    
    conn.commit()
    print(f'  Inserted {inserted} MLB SGO mappings, {matched} matched with Stats IDs')
    
    # Also insert MLB Stats players that weren't in SGO
    additional = 0
    for normalized, data in mlb_mapping.items():
        if normalized not in sgo_mapping:
            try:
                cur.execute(insert_sql, (
                    'mlb',
                    data['playerName'],
                    data['normalizedName'],
                    data['mlbStatsId'],
                    None,
                    None,
                ))
                additional += 1
            except Exception as e:
                conn.rollback()
    
    conn.commit()
    print(f'  Added {additional} additional MLB Stats-only players')
    
    # Summary
    cur.execute('''
        SELECT 
            COUNT(*) as total,
            COUNT("nbaComId") as with_mlb_id,
            COUNT("sgoId") as with_sgo_id,
            COUNT(CASE WHEN "nbaComId" IS NOT NULL AND "sgoId" IS NOT NULL THEN 1 END) as fully_mapped
        FROM "PlayerMapping"
        WHERE league = 'mlb'
    ''')
    stats = cur.fetchone()
    
    print(f'\n✅ MLB Player Mapping Complete:')
    print(f'   Total: {stats[0]}')
    print(f'   With MLB Stats ID: {stats[1]}')
    print(f'   With SGO ID: {stats[2]}')
    print(f'   Fully Mapped (both IDs): {stats[3]}')
    
    # Show some sample mappings
    print('\n📋 Sample Fully Mapped Players:')
    cur.execute('''
        SELECT "playerName", "nbaComId", "sgoId"
        FROM "PlayerMapping"
        WHERE league = 'mlb'
        AND "nbaComId" IS NOT NULL
        AND "sgoId" IS NOT NULL
        ORDER BY "playerName"
        LIMIT 10
    ''')
    for row in cur.fetchall():
        print(f'   {row[0]}: MLB Stats={row[1]}, SGO={row[2]}')
    
    cur.close()
    conn.close()
    
    return {'success': True, 'total': stats[0], 'fully_mapped': stats[3]}

if __name__ == '__main__':
    build_mapping()
