#!/usr/bin/env python3
"""
NFL Coverage Matchup Metrics (nfl_data_py)
Computes man vs zone success rates and EPA for offense and defense.
"""
import nfl_data_py as nfl
import psycopg2
from datetime import datetime, timezone
import sys


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def ingest_coverage_metrics(season=2025):
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    stats_added = 0

    game_key = f'{season}_NFL_COVERAGE'
    game_date = datetime(season, 9, 1, tzinfo=timezone.utc)

    sql_team = '''
        INSERT INTO "TeamGameMetric"
        (league, season, "gameKey", "gameDate", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", team, "statKey")
        DO UPDATE SET value = EXCLUDED.value
    '''

    print(f'Fetching NFL PBP for {season}...')
    pbp = nfl.import_pbp_data([season])
    print(f'  Plays: {len(pbp)}')

    # Filter to pass plays with coverage data
    passes = pbp[(pbp['play_type'] == 'pass') & (pbp['posteam'].notna())]
    
    # Check for coverage column
    if 'defense_man_zone_type' not in passes.columns:
        print('  WARNING: defense_man_zone_type column not found, skipping coverage metrics')
        conn.close()
        return {'success': True, 'stats_added': 0}

    passes_with_cov = passes[passes['defense_man_zone_type'].notna()].copy()
    print(f'  Pass plays with coverage data: {len(passes_with_cov)}')

    if len(passes_with_cov) == 0:
        print('  No coverage data available for this season')
        conn.close()
        return {'success': True, 'stats_added': 0}

    # Classify as MAN or ZONE
    passes_with_cov['is_man'] = passes_with_cov['defense_man_zone_type'].str.upper().str.contains('MAN', na=False)
    passes_with_cov['is_zone'] = passes_with_cov['defense_man_zone_type'].str.upper().str.contains('ZONE', na=False)
    
    # Success = positive EPA
    passes_with_cov['success'] = (passes_with_cov['epa'] > 0).astype(int)

    # ========== DEFENSIVE Stats (by defteam) ==========
    man_plays = passes_with_cov[passes_with_cov['is_man']]
    zone_plays = passes_with_cov[passes_with_cov['is_zone']]

    # Man coverage defense
    if len(man_plays) > 0:
        def_man = man_plays.groupby('defteam').agg({
            'epa': 'mean',
            'success': 'mean',
            'play_id': 'count'
        }).rename(columns={'play_id': 'plays'})
        
        for team, row in def_man.iterrows():
            if row['plays'] >= 10:  # Min sample
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_def_man_epa_allowed', float(row['epa'])))
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_def_man_success_allowed', float(row['success'])))
                stats_added += 2

    # Zone coverage defense
    if len(zone_plays) > 0:
        def_zone = zone_plays.groupby('defteam').agg({
            'epa': 'mean',
            'success': 'mean',
            'play_id': 'count'
        }).rename(columns={'play_id': 'plays'})
        
        for team, row in def_zone.iterrows():
            if row['plays'] >= 10:
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_def_zone_epa_allowed', float(row['epa'])))
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_def_zone_success_allowed', float(row['success'])))
                stats_added += 2

    # ========== OFFENSIVE Stats (by posteam - performance AGAINST coverage) ==========
    if len(man_plays) > 0:
        off_vs_man = man_plays.groupby('posteam').agg({
            'epa': 'mean',
            'success': 'mean',
            'play_id': 'count'
        }).rename(columns={'play_id': 'plays'})
        
        for team, row in off_vs_man.iterrows():
            if row['plays'] >= 10:
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_off_vs_man_epa', float(row['epa'])))
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_off_vs_man_success', float(row['success'])))
                stats_added += 2

    if len(zone_plays) > 0:
        off_vs_zone = zone_plays.groupby('posteam').agg({
            'epa': 'mean',
            'success': 'mean',
            'play_id': 'count'
        }).rename(columns={'play_id': 'plays'})
        
        for team, row in off_vs_zone.iterrows():
            if row['plays'] >= 10:
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_off_vs_zone_epa', float(row['epa'])))
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_off_vs_zone_success', float(row['success'])))
                stats_added += 2

    conn.commit()
    cur.close()
    conn.close()

    print(f'✅ NFL coverage metrics ingested: {stats_added}')
    return {'success': True, 'stats_added': stats_added}


if __name__ == '__main__':
    season = int(sys.argv[1]) if len(sys.argv) > 1 else 2025
    ingest_coverage_metrics(season)
