#!/usr/bin/env python3
"""
Build NFL Player ID Mapping Table
Maps NFL.com IDs to SGO IDs via player names
Enables joining EPA/advanced stats with prop lines
"""
import psycopg2
import re
from datetime import datetime, timezone

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def normalize_name(name):
    """Normalize player name for matching"""
    if not name:
        return ''
    # Convert SGO format: JOSH_ALLEN_1_NFL -> josh allen
    name = re.sub(r'_\d+_(NFL|NBA|NHL|MLB)$', '', name, flags=re.IGNORECASE)
    name = name.replace('_', ' ').lower().strip()
    # Remove suffixes like Jr., III, etc.
    name = re.sub(r'\s+(jr\.?|sr\.?|ii|iii|iv)$', '', name, flags=re.IGNORECASE)
    # Remove apostrophes and hyphens for matching
    name = re.sub(r"['\-]", '', name)
    return name

def expand_nfl_name(short_name):
    """Expand abbreviated NFL names like 'A.Rodgers' to 'Aaron Rodgers'"""
    # Known expansions for common first initials
    known_expansions = {
        'a.rodgers': 'aaron rodgers',
        'p.mahomes': 'patrick mahomes',
        'j.allen': 'josh allen',
        'j.burrow': 'joe burrow',
        'l.jackson': 'lamar jackson',
        'd.prescott': 'dak prescott',
        't.hill': 'tyreek hill',
        'j.jefferson': 'justin jefferson',
        'c.lamb': 'ceedee lamb',
        's.diggs': 'stefon diggs',
        'd.adams': 'davante adams',
        't.kelce': 'travis kelce',
        'g.kittle': 'george kittle',
        'm.andrews': 'mark andrews',
        'd.henry': 'derrick henry',
        'j.mixon': 'joe mixon',
        'c.mccaffrey': 'christian mccaffrey',
        's.barkley': 'saquon barkley',
        'b.robinson': 'brian robinson',
        'a.brown': 'amon-ra st brown',  # tricky one
        'a.st. brown': 'amon-ra st brown',
        't.lawrence': 'trevor lawrence',
        'c.stroud': 'c.j. stroud',
        'b.nix': 'bo nix',
        'j.hurts': 'jalen hurts',
        'b.purdy': 'brock purdy',
        'm.stafford': 'matthew stafford',
        'r.wilson': 'russell wilson',
        'k.murray': 'kyler murray',
        'j.goff': 'jared goff',
        'd.watson': 'deshaun watson',
        'a.richardson': 'anthony richardson',
        'j.daniels': 'jayden daniels',
        'c.williams': 'caleb williams',
        'd.maye': 'drake maye',
    }
    
    normalized = short_name.lower().strip()
    if normalized in known_expansions:
        return known_expansions[normalized]
    
    # Generic expansion: J.Smith -> j smith (remove period, hope for match)
    expanded = re.sub(r'\.', ' ', normalized).strip()
    expanded = re.sub(r'\s+', ' ', expanded)
    return expanded

def build_mapping():
    """Build the NFL player ID mapping table"""
    db_url = load_db_url()
    if not db_url:
        print('Error: SPORTS_DATABASE_URL not found')
        return
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    # Get unique SGO IDs from PlayerPropLine (contains player names)
    print('Fetching NFL players from PlayerPropLine (SGO IDs)...')
    cur.execute('''
        SELECT DISTINCT "playerExternalId"
        FROM "PlayerPropLine"
        WHERE league = 'nfl'
        AND "playerExternalId" LIKE '%_NFL'
    ''')
    sgo_players = cur.fetchall()
    print(f'  Found {len(sgo_players)} SGO-style NFL player IDs')
    
    # Build SGO mapping: extract names from IDs
    sgo_mapping = {}
    for (sgo_id,) in sgo_players:
        # Extract name from SGO ID: JOSH_ALLEN_1_NFL -> Josh Allen
        name_match = re.match(r'^([A-Z_]+)_\d+_NFL$', sgo_id, re.IGNORECASE)
        if name_match:
            name_parts = name_match.group(1).split('_')
            player_name = ' '.join(p.capitalize() for p in name_parts)
            normalized = normalize_name(player_name)
            
            sgo_mapping[normalized] = {
                'playerName': player_name,
                'normalizedName': normalized,
                'sgoId': sgo_id,
            }
    
    print(f'  Extracted {len(sgo_mapping)} unique player names from SGO')
    
    # Get unique NFL.com IDs from PlayerGameMetric (has player names too)
    print('Fetching NFL players from PlayerGameMetric (NFL.com IDs)...')
    cur.execute('''
        SELECT DISTINCT "playerExternalId", "playerName", position, team
        FROM "PlayerGameMetric"
        WHERE league = 'nfl'
        AND "playerExternalId" LIKE '00-%'
        AND "playerName" IS NOT NULL
    ''')
    nfl_players = cur.fetchall()
    print(f'  Found {len(nfl_players)} NFL.com style player IDs')
    
    # Build NFL.com mapping
    nfl_mapping = {}
    for nfl_id, player_name, position, team in nfl_players:
        if not player_name:
            continue
        # Expand abbreviated names
        expanded = expand_nfl_name(player_name)
        normalized = normalize_name(expanded)
        
        if normalized not in nfl_mapping:
            nfl_mapping[normalized] = {
                'nflComId': nfl_id,
                'playerName': player_name,
                'normalizedName': normalized,
                'position': position,
                'team': team,
            }
    
    print(f'  Extracted {len(nfl_mapping)} unique player names from NFL.com data')
    
    # Match and insert
    insert_sql = '''
        INSERT INTO "PlayerMapping"
        (league, "playerName", "normalizedName", "nbaComId", "sgoId", "espnId")
        VALUES (%s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, "normalizedName")
        DO UPDATE SET
            "nbaComId" = COALESCE(EXCLUDED."nbaComId", "PlayerMapping"."nbaComId"),
            "sgoId" = COALESCE(EXCLUDED."sgoId", "PlayerMapping"."sgoId"),
            "updatedAt" = NOW()
    '''
    
    inserted = 0
    matched = 0
    
    # First, insert all SGO players
    for normalized, data in sgo_mapping.items():
        nfl_id = None
        # Try to find matching NFL.com ID
        if normalized in nfl_mapping:
            nfl_id = nfl_mapping[normalized]['nflComId']
            matched += 1
        
        try:
            cur.execute(insert_sql, (
                'nfl',
                data['playerName'],
                data['normalizedName'],
                nfl_id,  # Using nbaComId column for NFL.com ID (reusing schema)
                data['sgoId'],
                None,  # espnId
            ))
            inserted += 1
        except Exception as e:
            print(f'  Error inserting {data["playerName"]}: {e}')
            conn.rollback()
    
    conn.commit()
    print(f'  Inserted {inserted} NFL mappings, {matched} with NFL.com IDs')
    
    # Also insert NFL.com players that weren't in SGO (for completeness)
    additional = 0
    for normalized, data in nfl_mapping.items():
        if normalized not in sgo_mapping:
            try:
                cur.execute(insert_sql, (
                    'nfl',
                    data['playerName'],
                    data['normalizedName'],
                    data['nflComId'],
                    None,  # No SGO ID
                    None,
                ))
                additional += 1
            except Exception as e:
                conn.rollback()
    
    conn.commit()
    print(f'  Added {additional} additional NFL.com-only players')
    
    # Summary
    cur.execute('''
        SELECT 
            COUNT(*) as total,
            COUNT("nbaComId") as with_nfl_id,
            COUNT("sgoId") as with_sgo_id,
            COUNT(CASE WHEN "nbaComId" IS NOT NULL AND "sgoId" IS NOT NULL THEN 1 END) as fully_mapped
        FROM "PlayerMapping"
        WHERE league = 'nfl'
    ''')
    stats = cur.fetchone()
    
    print(f'\n✅ NFL Player Mapping Complete:')
    print(f'   Total: {stats[0]}')
    print(f'   With NFL.com ID: {stats[1]}')
    print(f'   With SGO ID: {stats[2]}')
    print(f'   Fully Mapped (both IDs): {stats[3]}')
    
    # Show some sample mappings
    print('\n📋 Sample Fully Mapped Players:')
    cur.execute('''
        SELECT "playerName", "nbaComId", "sgoId"
        FROM "PlayerMapping"
        WHERE league = 'nfl'
        AND "nbaComId" IS NOT NULL
        AND "sgoId" IS NOT NULL
        LIMIT 10
    ''')
    for row in cur.fetchall():
        print(f'   {row[0]}: NFL.com={row[1]}, SGO={row[2]}')
    
    cur.close()
    conn.close()
    
    return {'success': True, 'total': stats[0], 'fully_mapped': stats[3]}

if __name__ == '__main__':
    build_mapping()
