#!/var/www/html/eventheodds/.venv-scraper/bin/python
"""
Soccer xG Advanced Stats - Understat API
Fetches xG, xA, xGChain, xGBuildup for players and teams

NOTE: Understat.com now uses client-side JavaScript rendering.
This scraper may return 0 results unless Playwright is used.
Consider using scrape_fbref_soccer.py as an alternative.
"""
import requests
import re
import json
import psycopg2
from datetime import datetime, timezone

UNDERSTAT_BASE = 'https://understat.com'

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def extract_json_from_html(html, var_name):
    """Extract JSON data from Understat HTML page"""
    pattern = rf"var {var_name}\s*=\s*JSON\.parse\('(.+?)'\)"
    match = re.search(pattern, html)
    if match:
        json_str = match.group(1)
        # Unescape the JSON string
        json_str = json_str.encode().decode('unicode_escape')
        return json.loads(json_str)
    return None

def fetch_league_players(league='EPL', season='2024'):
    """Fetch player xG data from Understat"""
    url = f'{UNDERSTAT_BASE}/league/{league}/{season}'
    
    print(f'Fetching {league} player xG for {season}...')
    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code != 200:
            print(f'  Status: {resp.status_code}')
            return []
        
        data = extract_json_from_html(resp.text, 'playersData')
        if data:
            print(f'  Found {len(data)} players')
            return data
        return []
    except Exception as e:
        print(f'Error: {e}')
        return []

def fetch_league_teams(league='EPL', season='2024'):
    """Fetch team xG data from Understat"""
    url = f'{UNDERSTAT_BASE}/league/{league}/{season}'
    
    print(f'Fetching {league} team xG for {season}...')
    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code != 200:
            return []
        
        data = extract_json_from_html(resp.text, 'teamsData')
        if data:
            print(f'  Found {len(data)} teams')
            return data
        return []
    except Exception as e:
        print(f'Error: {e}')
        return []

def safe_float(val):
    try:
        if val is None or val == '':
            return None
        return float(val)
    except:
        return None

def ingest_soccer_xg(season='2024'):
    """Main ingest function"""
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    stats_added = 0
    
    season_year = int(season)
    game_date = datetime(season_year, 8, 1, tzinfo=timezone.utc)
    
    sql_team = '''
        INSERT INTO "TeamGameMetric"
        (league, season, "gameKey", "gameDate", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", team, "statKey")
        DO UPDATE SET value = EXCLUDED.value
    '''
    
    sql_player = '''
        INSERT INTO "PlayerGameMetric"
        (league, season, "gameKey", "gameDate", "playerExternalId", "playerName", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", "playerExternalId", "statKey")
        DO UPDATE SET value = EXCLUDED.value
    '''
    
    # League mapping
    leagues = {
        'EPL': 'epl',
        'La_liga': 'laliga',
        'Bundesliga': 'bundesliga',
        'Serie_A': 'seriea',
        'Ligue_1': 'ligue1',
    }
    
    for understat_league, db_league in leagues.items():
        game_key = f'{season}_{db_league.upper()}_SEASON'
        
        # 1. Player xG data
        players = fetch_league_players(understat_league, season)
        player_count = 0
        
        for player in players:
            player_id = str(player.get('id', ''))
            player_name = player.get('player_name', '')
            team = player.get('team_title', '')
            
            if not player_name:
                continue
            
            metrics = [
                ('soccer_xg', player.get('xG')),  # Expected Goals
                ('soccer_xa', player.get('xA')),  # Expected Assists
                ('soccer_xg_chain', player.get('xGChain')),  # xG Chain
                ('soccer_xg_buildup', player.get('xGBuildup')),  # xG Buildup
                ('soccer_npxg', player.get('npxG')),  # Non-penalty xG
                ('soccer_goals', player.get('goals')),
                ('soccer_assists', player.get('assists')),
                ('soccer_shots', player.get('shots')),
                ('soccer_key_passes', player.get('key_passes')),
                ('soccer_games', player.get('games')),
                ('soccer_minutes', player.get('time')),
                ('soccer_yellow_cards', player.get('yellow_cards')),
                ('soccer_red_cards', player.get('red_cards')),
                ('soccer_npg', player.get('npg')),  # Non-penalty goals
            ]
            
            # Calculate per-90 metrics
            minutes = safe_float(player.get('time'))
            if minutes and minutes > 0:
                xg = safe_float(player.get('xG'))
                xa = safe_float(player.get('xA'))
                goals = safe_float(player.get('goals'))
                if xg is not None:
                    metrics.append(('soccer_xg_per90', xg / minutes * 90))
                if xa is not None:
                    metrics.append(('soccer_xa_per90', xa / minutes * 90))
                if goals is not None:
                    metrics.append(('soccer_goals_per90', goals / minutes * 90))
            
            for stat_key, val in metrics:
                fval = safe_float(val)
                if fval is not None:
                    try:
                        cur.execute(sql_player, (
                            db_league, season_year, game_key, game_date,
                            player_id, player_name, team,
                            stat_key, fval
                        ))
                        stats_added += 1
                        player_count += 1
                    except:
                        conn.rollback()
        
        conn.commit()
        print(f'  {understat_league} player xG added: {player_count}')
        
        # 2. Team xG data
        teams_data = fetch_league_teams(understat_league, season)
        team_count = 0
        
        if isinstance(teams_data, dict):
            for team_id, team_info in teams_data.items():
                team_name = team_info.get('title', '')
                if not team_name:
                    continue
                
                # Aggregate history for season totals
                history = team_info.get('history', [])
                if not history:
                    continue
                
                total_xg = sum(safe_float(g.get('xG')) or 0 for g in history)
                total_xga = sum(safe_float(g.get('xGA')) or 0 for g in history)
                total_goals = sum(safe_float(g.get('scored')) or 0 for g in history)
                total_conceded = sum(safe_float(g.get('missed')) or 0 for g in history)
                total_pts = sum(safe_float(g.get('pts')) or 0 for g in history)
                games = len(history)
                
                metrics = [
                    ('soccer_team_xg', total_xg),
                    ('soccer_team_xga', total_xga),
                    ('soccer_team_goals', total_goals),
                    ('soccer_team_conceded', total_conceded),
                    ('soccer_team_points', total_pts),
                    ('soccer_team_games', games),
                    ('soccer_team_xg_diff', total_xg - total_xga),
                    ('soccer_team_goal_diff', total_goals - total_conceded),
                ]
                
                if games > 0:
                    metrics.append(('soccer_team_xg_per_game', total_xg / games))
                    metrics.append(('soccer_team_xga_per_game', total_xga / games))
                    metrics.append(('soccer_team_ppg', total_pts / games))
                
                for stat_key, val in metrics:
                    if val is not None:
                        try:
                            cur.execute(sql_team, (db_league, season_year, game_key, game_date, team_name, stat_key, float(val)))
                            stats_added += 1
                            team_count += 1
                        except:
                            conn.rollback()
        
        conn.commit()
        print(f'  {understat_league} team xG added: {team_count}')
    
    cur.close()
    conn.close()
    
    print(f'')
    print(f'✅ Soccer xG ingested: {stats_added} total metrics')
    return {'success': True, 'stats_added': stats_added}

if __name__ == '__main__':
    import sys
    season = sys.argv[1] if len(sys.argv) > 1 else '2024'
    ingest_soccer_xg(season)
