#!/usr/bin/env python3
"""
NHL Advanced Stats Scraper - Natural Stat Trick
Fetches HDCF, xG, Corsi, Fenwick at 5v5 for all teams
"""
import requests
import pandas as pd
import psycopg2
import os
from datetime import datetime, timezone
from bs4 import BeautifulSoup
import time
import re

# Natural Stat Trick URLs
NST_BASE = 'https://www.naturalstattrick.com'
NST_TEAM_TABLE = f'{NST_BASE}/teamtable.php'

def load_db_url():
    db_url = os.environ.get('SPORTS_DATABASE_URL', '')
    if db_url:
        return db_url.split('?')[0]
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def fetch_nst_team_stats(season='20252026', situation='5v5'):
    """Fetch team stats from Natural Stat Trick"""
    params = {
        'fromseason': season,
        'thruseason': season,
        'stype': '2',  # Regular season
        'sit': situation,  # 5v5, all, pp, pk, etc.
        'score': 'all',
        'rate': 'n',  # Totals (not per 60)
        'team': 'all',
        'loc': 'B',  # Both home and away
        'gpf': '410',  # Games played filter
        'fd': '',
        'td': ''
    }
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    
    print(f'Fetching NST team stats for {season} {situation}...')
    resp = requests.get(NST_TEAM_TABLE, params=params, headers=headers, timeout=30)
    resp.raise_for_status()
    
    # Parse HTML table
    soup = BeautifulSoup(resp.text, 'html.parser')
    table = soup.find('table', {'id': 'teams'})
    
    if not table:
        print('No team table found!')
        return []
    
    # Extract headers
    headers_row = table.find('thead').find_all('th')
    headers = [th.get_text(strip=True) for th in headers_row]
    
    # Extract data rows
    rows = []
    tbody = table.find('tbody')
    for tr in tbody.find_all('tr'):
        cells = tr.find_all('td')
        if len(cells) >= len(headers):
            row = {}
            for i, cell in enumerate(cells[:len(headers)]):
                row[headers[i]] = cell.get_text(strip=True)
            rows.append(row)
    
    print(f'  Found {len(rows)} teams')
    return rows

def normalize_stat_key(col_name):
    """Convert NST column names to our stat keys"""
    # Clean up the column name
    key = col_name.lower()
    key = re.sub(r'[^a-z0-9]+', '_', key)
    key = key.strip('_')
    return f'nst_{key}'

def parse_value(val):
    """Parse string value to float"""
    if not val or val == '-':
        return None
    try:
        # Remove % and commas
        cleaned = val.replace('%', '').replace(',', '')
        return float(cleaned)
    except:
        return None

def ingest_nhl_advanced_stats(season_str='20252026'):
    """Main ingest function"""
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    stats_added = 0
    season_year = int(season_str[:4])  # e.g., 2025 from 20252026
    
    # Fetch 5v5 stats (most important for analytics)
    team_stats = fetch_nst_team_stats(season=season_str, situation='5v5')
    
    if not team_stats:
        print('No team stats fetched!')
        return {'success': False, 'error': 'No data'}
    
    # Important columns we want
    key_columns = [
        'Team', 'GP', 'TOI', 'CF', 'CA', 'CF%', 'FF', 'FA', 'FF%',
        'SF', 'SA', 'SF%', 'GF', 'GA', 'GF%', 'xGF', 'xGA', 'xGF%',
        'SCF', 'SCA', 'SCF%', 'HDCF', 'HDCA', 'HDCF%', 'HDGF', 'HDGA',
        'HDSF', 'HDSA', 'SH%', 'SV%', 'PDO'
    ]
    
    game_key = f'{season_str}_5V5_SEASON'
    game_date = datetime(season_year, 10, 1, tzinfo=timezone.utc)  # Season start approx
    
    for team_row in team_stats:
        team_name = team_row.get('Team', '').strip()
        if not team_name:
            continue
        
        for col in team_row.keys():
            if col == 'Team':
                continue
            
            val = parse_value(team_row[col])
            if val is None:
                continue
            
            stat_key = normalize_stat_key(col)
            
            try:
                cur.execute('''
                    INSERT INTO "TeamGameMetric"
                    (league, season, "gameKey", "gameDate", team, "statKey", value)
                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (league, season, "gameKey", team, "statKey")
                    DO UPDATE SET value = EXCLUDED.value
                ''', ('nhl', season_year, game_key, game_date, team_name, stat_key, val))
                stats_added += 1
            except Exception as e:
                print(f'Error inserting {team_name} {stat_key}: {e}')
                conn.rollback()
    
    conn.commit()
    
    # Also fetch All Situations for comparison
    print('')
    time.sleep(1)  # Be nice to NST
    all_sit_stats = fetch_nst_team_stats(season=season_str, situation='all')
    
    game_key_all = f'{season_str}_ALL_SEASON'
    for team_row in all_sit_stats:
        team_name = team_row.get('Team', '').strip()
        if not team_name:
            continue
        
        for col in team_row.keys():
            if col == 'Team':
                continue
            
            val = parse_value(team_row[col])
            if val is None:
                continue
            
            stat_key = normalize_stat_key(col) + '_all'
            
            try:
                cur.execute('''
                    INSERT INTO "TeamGameMetric"
                    (league, season, "gameKey", "gameDate", team, "statKey", value)
                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (league, season, "gameKey", team, "statKey")
                    DO UPDATE SET value = EXCLUDED.value
                ''', ('nhl', season_year, game_key_all, game_date, team_name, stat_key, val))
                stats_added += 1
            except Exception as e:
                conn.rollback()
    
    conn.commit()
    cur.close()
    conn.close()
    
    print(f'')
    print(f'✅ NHL Advanced Stats ingested: {stats_added} metrics')
    return {'success': True, 'stats_added': stats_added}

if __name__ == '__main__':
    import sys
    season = sys.argv[1] if len(sys.argv) > 1 else '20252026'
    ingest_nhl_advanced_stats(season)
