#!/var/www/html/eventheodds/.venv-nfl/bin/python
"""
NFL Red Zone Team Metrics (nfl_data_py)
Computes drives per game, red-zone trips, TD rate, and play-calling tendencies.
"""
import nfl_data_py as nfl
import psycopg2
import pandas as pd
from datetime import datetime, timezone


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def ingest_nfl_redzone(season=2025):
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    stats_added = 0

    game_key = f'{season}_NFL_REDZONE'
    game_date = datetime(season, 9, 1, tzinfo=timezone.utc)

    sql_team = '''
        INSERT INTO "TeamGameMetric"
        (league, season, "gameKey", "gameDate", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", team, "statKey")
        DO UPDATE SET value = EXCLUDED.value
    '''

    print(f'Fetching NFL PBP for {season}...')
    pbp = nfl.import_pbp_data([season])
    print(f'  Plays: {len(pbp)}')

    required_cols = {'game_id', 'posteam', 'drive', 'yardline_100', 'play_type'}
    missing = required_cols - set(pbp.columns)
    if missing:
        raise RuntimeError(f'Missing columns in pbp: {missing}')

    plays = pbp[pbp['posteam'].notna()]
    plays = plays[plays['play_type'].isin(['pass', 'run'])]

    # Drives per game
    drives = (
        plays[['game_id', 'posteam', 'drive']]
        .dropna()
        .drop_duplicates()
    )
    drives_per_team = drives.groupby('posteam')['drive'].count().rename('drives').reset_index()
    games_per_team = drives.groupby('posteam')['game_id'].nunique().rename('games').reset_index()
    drive_stats = drives_per_team.merge(games_per_team, on='posteam', how='left')
    drive_stats['drives_per_game'] = drive_stats['drives'] / drive_stats['games']

    # Red-zone plays
    rz_plays = plays[plays['yardline_100'].notna() & (plays['yardline_100'] <= 20)]
    rz_pass = rz_plays[rz_plays['play_type'] == 'pass'].groupby('posteam').size().rename('rz_pass_plays')
    rz_run = rz_plays[rz_plays['play_type'] == 'run'].groupby('posteam').size().rename('rz_run_plays')

    # Red-zone trips and TDs by drive
    drive_cols = ['game_id', 'posteam', 'drive']
    drive_df = plays[drive_cols + ['yardline_100']].dropna()
    drive_group = drive_df.groupby(drive_cols)
    rz_drive = drive_group['yardline_100'].min().rename('min_yardline').reset_index()
    rz_drive['rz_trip'] = rz_drive['min_yardline'] <= 20

    td_col = None
    for candidate in ['drive_end_result', 'drive_ended_with_score', 'drive_end_event']:
        if candidate in pbp.columns:
            td_col = candidate
            break
    if td_col:
        drive_results = pbp[drive_cols + [td_col]].dropna(subset=drive_cols).drop_duplicates(subset=drive_cols)
        rz_drive = rz_drive.merge(drive_results, on=drive_cols, how='left')
        if td_col == 'drive_ended_with_score':
            rz_drive['rz_td'] = rz_drive['drive_ended_with_score'].fillna(0).astype(int)
        else:
            rz_drive['rz_td'] = rz_drive[td_col].astype(str).str.contains('Touchdown', case=False, na=False).astype(int)
    else:
        rz_drive['rz_td'] = 0

    rz_trip_counts = rz_drive[rz_drive['rz_trip']].groupby('posteam').size().rename('rz_trips')
    rz_td_counts = rz_drive[(rz_drive['rz_trip']) & (rz_drive['rz_td'] == 1)].groupby('posteam').size().rename('rz_tds')

    # Merge stats
    team_df = drive_stats.set_index('posteam')
    team_df = team_df.join(rz_pass, how='left').join(rz_run, how='left').join(rz_trip_counts, how='left').join(rz_td_counts, how='left')
    team_df = team_df.fillna(0)
    team_df['rz_trips_per_game'] = team_df['rz_trips'] / team_df['games']
    team_df['rz_td_rate'] = team_df.apply(lambda r: (r['rz_tds'] / r['rz_trips']) if r['rz_trips'] > 0 else 0, axis=1)
    team_df['rz_pass_rate'] = team_df.apply(
        lambda r: (r['rz_pass_plays'] / (r['rz_pass_plays'] + r['rz_run_plays']))
        if (r['rz_pass_plays'] + r['rz_run_plays']) > 0 else 0,
        axis=1
    )
    team_df['rz_run_rate'] = 1 - team_df['rz_pass_rate']

    for team, row in team_df.reset_index().iterrows():
        t = row['posteam']
        if not t:
            continue

        metrics = [
            ('nfl_drives_per_game', row['drives_per_game']),
            ('nfl_redzone_trips_per_game', row['rz_trips_per_game']),
            ('nfl_redzone_td_rate', row['rz_td_rate']),
            ('nfl_redzone_pass_rate', row['rz_pass_rate']),
            ('nfl_redzone_run_rate', row['rz_run_rate']),
            ('nfl_redzone_trips', row['rz_trips']),
            ('nfl_redzone_tds', row['rz_tds']),
            ('nfl_redzone_pass_plays', row['rz_pass_plays']),
            ('nfl_redzone_run_plays', row['rz_run_plays']),
        ]

        for stat_key, val in metrics:
            try:
                cur.execute(sql_team, ('nfl', season, game_key, game_date, t, stat_key, float(val)))
                stats_added += 1
            except Exception:
                conn.rollback()

    conn.commit()
    cur.close()
    conn.close()

    print(f'✅ NFL red-zone metrics ingested: {stats_added}')
    return {'success': True, 'stats_added': stats_added}


if __name__ == '__main__':
    import sys
    season = int(sys.argv[1]) if len(sys.argv) > 1 else 2025
    ingest_nfl_redzone(season)
