#!/usr/bin/env python3
"""
NFL Drive-Level Efficiency Metrics (nfl_data_py)
Computes 3rd-down conversion rates by quarter, scoring drive rates, drive efficiency.
"""
import nfl_data_py as nfl
import psycopg2
from datetime import datetime, timezone
import sys


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def ingest_drive_efficiency(season=2025):
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    stats_added = 0

    game_key = f'{season}_NFL_DRIVE_EFFICIENCY'
    game_date = datetime(season, 9, 1, tzinfo=timezone.utc)

    sql_team = '''
        INSERT INTO "TeamGameMetric"
        (league, season, "gameKey", "gameDate", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", team, "statKey")
        DO UPDATE SET value = EXCLUDED.value
    '''

    print(f'Fetching NFL PBP for {season}...')
    pbp = nfl.import_pbp_data([season])
    print(f'  Plays: {len(pbp)}')

    # Filter to real plays
    plays = pbp[pbp['posteam'].notna()]
    plays = plays[plays['play_type'].isin(['pass', 'run'])]

    # Games per team for normalization
    games_per_team = plays.groupby('posteam')['game_id'].nunique().to_dict()

    # ========== 3rd Down Conversion by Quarter ==========
    third_down = plays[plays['down'] == 3].copy()
    third_down['converted'] = third_down['third_down_converted'].fillna(0).astype(int)
    third_down['failed'] = third_down['third_down_failed'].fillna(0).astype(int)
    third_down['attempts'] = third_down['converted'] + third_down['failed']

    for qtr in [1, 2, 3, 4]:
        qtr_plays = third_down[third_down['qtr'] == qtr]
        by_team = qtr_plays.groupby('posteam').agg({
            'converted': 'sum',
            'attempts': 'sum'
        })
        for team, row in by_team.iterrows():
            if row['attempts'] > 0:
                conv_rate = row['converted'] / row['attempts']
                cur.execute(sql_team, ('nfl', season, game_key, game_date, team, f'nfl_3rd_down_conv_q{qtr}', float(conv_rate)))
                stats_added += 1

    # Overall 3rd down conversion
    overall = third_down.groupby('posteam').agg({'converted': 'sum', 'attempts': 'sum'})
    for team, row in overall.iterrows():
        if row['attempts'] > 0:
            conv_rate = row['converted'] / row['attempts']
            cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_3rd_down_conv_overall', float(conv_rate)))
            stats_added += 1

    # ========== 4th Down Conversion ==========
    fourth_down = plays[plays['down'] == 4].copy()
    fourth_down['converted'] = fourth_down['fourth_down_converted'].fillna(0).astype(int)
    fourth_down['failed'] = fourth_down['fourth_down_failed'].fillna(0).astype(int)
    fourth_down['attempts'] = fourth_down['converted'] + fourth_down['failed']

    fourth_overall = fourth_down.groupby('posteam').agg({'converted': 'sum', 'attempts': 'sum'})
    for team, row in fourth_overall.iterrows():
        if row['attempts'] > 0:
            conv_rate = row['converted'] / row['attempts']
            cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_4th_down_conv_overall', float(conv_rate)))
            stats_added += 1

    # ========== Scoring Drive Rate ==========
    # Count drives that ended in TD or FG
    drives = plays[['game_id', 'posteam', 'drive']].dropna().drop_duplicates()
    total_drives = drives.groupby('posteam').size().to_dict()

    # Check for scoring columns
    if 'drive_end_result' in pbp.columns:
        drive_results = pbp[['game_id', 'posteam', 'drive', 'drive_end_result']].dropna(subset=['drive', 'posteam']).drop_duplicates(subset=['game_id', 'posteam', 'drive'])
        drive_results['is_score'] = drive_results['drive_end_result'].str.contains('Touchdown|Field goal', case=False, na=False)
        scoring_drives = drive_results[drive_results['is_score']].groupby('posteam').size().to_dict()
    else:
        # Fallback: use touchdown column
        td_plays = plays[plays['touchdown'] == 1]
        scoring_drives = td_plays.groupby('posteam')['drive'].nunique().to_dict()

    for team in total_drives:
        if total_drives[team] > 0:
            score_count = scoring_drives.get(team, 0)
            scoring_rate = score_count / total_drives[team]
            cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_scoring_drive_rate', float(scoring_rate)))
            stats_added += 1

    # ========== Yards per Drive ==========
    if 'drive_yards' in pbp.columns or 'ydsnet' in plays.columns:
        col = 'drive_yards' if 'drive_yards' in pbp.columns else 'ydsnet'
        drive_yards = plays.groupby(['posteam', 'game_id', 'drive'])[col].max().reset_index()
        avg_yards_per_drive = drive_yards.groupby('posteam')[col].mean()
        for team, val in avg_yards_per_drive.items():
            cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_yards_per_drive', float(val)))
            stats_added += 1

    # ========== Plays per Drive ==========
    plays_per_drive = plays.groupby(['posteam', 'game_id', 'drive']).size().reset_index(name='plays')
    avg_plays_per_drive = plays_per_drive.groupby('posteam')['plays'].mean()
    for team, val in avg_plays_per_drive.items():
        cur.execute(sql_team, ('nfl', season, game_key, game_date, team, 'nfl_plays_per_drive', float(val)))
        stats_added += 1

    conn.commit()
    cur.close()
    conn.close()

    print(f'✅ NFL drive efficiency metrics ingested: {stats_added}')
    return {'success': True, 'stats_added': stats_added}


if __name__ == '__main__':
    season = int(sys.argv[1]) if len(sys.argv) > 1 else 2025
    ingest_drive_efficiency(season)
