#!/usr/bin/env python3
"""
College Football Advanced Stats - CollegeFootballData.com API
Fetches SP+ ratings (preseason & current), PPA, team efficiency
Tracks regression by comparing preseason to current rankings
"""
import requests
import psycopg2
import os
from datetime import datetime, timezone

CFBD_BASE = 'https://api.collegefootballdata.com'

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def load_cfbd_key():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('CFBD_API_KEY='):
                return line.split('=', 1)[1].strip()
    return None

def fetch_sp_ratings(year=2025):
    """Fetch current SP+ ratings from CFBD"""
    key = load_cfbd_key()
    if not key:
        print('No CFBD API key found')
        return []
    
    headers = {'Authorization': f'Bearer {key}'}
    url = f'{CFBD_BASE}/ratings/sp'
    params = {'year': year}
    
    print(f'Fetching SP+ ratings for {year}...')
    try:
        resp = requests.get(url, params=params, headers=headers, timeout=30)
        if resp.status_code == 401:
            print('CFBD API key invalid or expired')
            return []
        resp.raise_for_status()
        data = resp.json()
        print(f'  Found {len(data)} teams')
        return data
    except Exception as e:
        print(f'Error fetching SP+: {e}')
        return []

def fetch_preseason_rankings(year=2025):
    """Fetch preseason AP/Coaches poll rankings"""
    key = load_cfbd_key()
    headers = {'Authorization': f'Bearer {key}'} if key else {}
    
    url = f'{CFBD_BASE}/rankings'
    params = {'year': year, 'week': 1, 'seasonType': 'regular'}
    
    print(f'Fetching preseason rankings for {year}...')
    try:
        resp = requests.get(url, params=params, headers=headers, timeout=30)
        resp.raise_for_status()
        data = resp.json()
        if data:
            print(f'  Found {len(data)} weeks of rankings')
            return data[0] if data else {}
        return {}
    except Exception as e:
        print(f'Error: {e}')
        return {}

def fetch_current_rankings(year=2025):
    """Fetch most recent AP/Coaches poll rankings"""
    key = load_cfbd_key()
    headers = {'Authorization': f'Bearer {key}'} if key else {}
    
    url = f'{CFBD_BASE}/rankings'
    params = {'year': year, 'seasonType': 'regular'}
    
    print(f'Fetching current rankings for {year}...')
    try:
        resp = requests.get(url, params=params, headers=headers, timeout=30)
        resp.raise_for_status()
        data = resp.json()
        if data:
            latest = data[-1]
            print(f'  Week {latest.get("week")}: {len(latest.get("polls", []))} polls')
            return latest
        return {}
    except Exception as e:
        print(f'Error: {e}')
        return {}

def fetch_ppa(year=2025):
    """Fetch Predicted Points Added (similar to EPA)"""
    key = load_cfbd_key()
    if not key:
        return []
    
    headers = {'Authorization': f'Bearer {key}'}
    url = f'{CFBD_BASE}/ppa/teams'
    params = {'year': year}
    
    print(f'Fetching PPA data for {year}...')
    try:
        resp = requests.get(url, params=params, headers=headers, timeout=30)
        if resp.status_code == 401:
            return []
        resp.raise_for_status()
        data = resp.json()
        print(f'  Found {len(data)} teams')
        return data
    except Exception as e:
        print(f'Error: {e}')
        return []

def fetch_team_talent(year=2025):
    """Fetch team talent composite rankings (recruiting)"""
    key = load_cfbd_key()
    if not key:
        return []
    
    headers = {'Authorization': f'Bearer {key}'}
    url = f'{CFBD_BASE}/talent'
    params = {'year': year}
    
    print(f'Fetching team talent for {year}...')
    try:
        resp = requests.get(url, params=params, headers=headers, timeout=30)
        if resp.status_code == 401:
            return []
        resp.raise_for_status()
        data = resp.json()
        print(f'  Found {len(data)} teams')
        return data
    except Exception as e:
        print(f'Error: {e}')
        return []

def fetch_returning_production(year=2025):
    """Fetch returning production % for each team"""
    key = load_cfbd_key()
    if not key:
        return []
    
    headers = {'Authorization': f'Bearer {key}'}
    url = f'{CFBD_BASE}/player/returning'
    params = {'year': year}
    
    print(f'Fetching returning production for {year}...')
    try:
        resp = requests.get(url, params=params, headers=headers, timeout=30)
        if resp.status_code == 401:
            return []
        resp.raise_for_status()
        data = resp.json()
        print(f'  Found {len(data)} teams')
        return data
    except Exception as e:
        print(f'Error: {e}')
        return []

def ingest_cfb_advanced(year=2025):
    """Main ingest function"""
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    stats_added = 0
    
    game_key = f'{year}_CFB_SEASON'
    game_date = datetime(year, 8, 15, tzinfo=timezone.utc)
    
    sql = '''
        INSERT INTO "TeamGameMetric"
        (league, season, "gameKey", "gameDate", team, "statKey", value)
        VALUES (%s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (league, season, "gameKey", team, "statKey")
        DO UPDATE SET value = EXCLUDED.value
    '''
    
    # 1. SP+ Ratings (Current)
    sp_data = fetch_sp_ratings(year)
    sp_count = 0
    for team in sp_data:
        team_name = team.get('team', '')
        if not team_name:
            continue
        
        offense = team.get('offense', {})
        defense = team.get('defense', {})
        special = team.get('specialTeams', {})
        
        metrics = [
            ('cfb_sp_overall', team.get('rating')),
            ('cfb_sp_ranking', team.get('ranking')),
        ]
        
        if isinstance(offense, dict):
            metrics.append(('cfb_sp_offense', offense.get('rating')))
            metrics.append(('cfb_sp_offense_rank', offense.get('ranking')))
        else:
            metrics.append(('cfb_sp_offense', offense))
            
        if isinstance(defense, dict):
            metrics.append(('cfb_sp_defense', defense.get('rating')))
            metrics.append(('cfb_sp_defense_rank', defense.get('ranking')))
        else:
            metrics.append(('cfb_sp_defense', defense))
            
        if isinstance(special, dict):
            metrics.append(('cfb_sp_special', special.get('rating')))
        
        for stat_key, val in metrics:
            if val is not None:
                try:
                    cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, stat_key, float(val)))
                    stats_added += 1
                    sp_count += 1
                except Exception as e:
                    conn.rollback()
    
    conn.commit()
    print(f'  SP+ metrics added: {sp_count}')
    
    # 2. Preseason Rankings (for regression tracking)
    preseason = fetch_preseason_rankings(year)
    preseason_count = 0
    for poll in preseason.get('polls', []):
        poll_name = poll.get('poll', 'Unknown')
        poll_key = poll_name.lower().replace(' ', '_').replace("'", '')
        
        for rank_entry in poll.get('ranks', []):
            team_name = rank_entry.get('school', '')
            rank = rank_entry.get('rank')
            
            if not team_name or rank is None:
                continue
            
            stat_key = f'cfb_preseason_rank_{poll_key}'
            try:
                cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, stat_key, float(rank)))
                stats_added += 1
                preseason_count += 1
            except:
                conn.rollback()
    
    conn.commit()
    print(f'  Preseason rankings added: {preseason_count}')
    
    # 3. Current Rankings
    current = fetch_current_rankings(year)
    current_count = 0
    current_week = current.get('week', 0)
    for poll in current.get('polls', []):
        poll_name = poll.get('poll', 'Unknown')
        poll_key = poll_name.lower().replace(' ', '_').replace("'", '')
        
        for rank_entry in poll.get('ranks', []):
            team_name = rank_entry.get('school', '')
            rank = rank_entry.get('rank')
            points = rank_entry.get('points')
            
            if not team_name or rank is None:
                continue
            
            metrics = [
                (f'cfb_current_rank_{poll_key}', rank),
                (f'cfb_current_points_{poll_key}', points),
            ]
            
            for stat_key, val in metrics:
                if val is not None:
                    try:
                        cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, stat_key, float(val)))
                        stats_added += 1
                        current_count += 1
                    except:
                        conn.rollback()
    
    conn.commit()
    print(f'  Current rankings (Week {current_week}) added: {current_count}')
    
    # 4. PPA (Predicted Points Added)
    ppa_count = 0
    ppa_data = fetch_ppa(year)
    for team in ppa_data:
        team_name = team.get('team', '')
        if not team_name:
            continue
        
        off = team.get('offense', {}) or {}
        def_ = team.get('defense', {}) or {}
        
        metrics = [
            ('cfb_ppa_overall', off.get('overall')),
            ('cfb_ppa_passing', off.get('passing')),
            ('cfb_ppa_rushing', off.get('rushing')),
            ('cfb_ppa_first_down', off.get('firstDown')),
            ('cfb_ppa_second_down', off.get('secondDown')),
            ('cfb_ppa_third_down', off.get('thirdDown')),
            ('cfb_ppa_def_overall', def_.get('overall')),
            ('cfb_ppa_def_passing', def_.get('passing')),
            ('cfb_ppa_def_rushing', def_.get('rushing')),
        ]
        
        for stat_key, val in metrics:
            if val is not None:
                try:
                    cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, stat_key, float(val)))
                    stats_added += 1
                    ppa_count += 1
                except:
                    conn.rollback()
    
    conn.commit()
    print(f'  PPA metrics added: {ppa_count}')
    
    # 5. Team Talent Composite
    talent_count = 0
    talent_data = fetch_team_talent(year)
    for team in talent_data:
        team_name = team.get('school', '')
        talent = team.get('talent')
        
        if not team_name or talent is None:
            continue
        
        try:
            cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, 'cfb_talent_composite', float(talent)))
            stats_added += 1
            talent_count += 1
        except:
            conn.rollback()
    
    conn.commit()
    print(f'  Talent composite added: {talent_count}')
    
    # 6. Returning Production
    returning_count = 0
    returning_data = fetch_returning_production(year)
    for team in returning_data:
        team_name = team.get('team', '')
        if not team_name:
            continue
        
        metrics = [
            ('cfb_returning_ppa', team.get('totalPPA')),
            ('cfb_returning_usage', team.get('totalUsage')),
            ('cfb_returning_pass_ppa', team.get('passingPPA')),
            ('cfb_returning_rush_ppa', team.get('rushingPPA')),
            ('cfb_returning_rec_ppa', team.get('receivingPPA')),
        ]
        
        for stat_key, val in metrics:
            if val is not None:
                try:
                    cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, stat_key, float(val)))
                    stats_added += 1
                    returning_count += 1
                except:
                    conn.rollback()
    
    conn.commit()
    print(f'  Returning production added: {returning_count}')
    
    # Calculate regression metrics (preseason rank - current rank)
    print('Calculating regression metrics...')
    cur.execute('''
        SELECT t1.team, 
               t1.value as preseason_rank,
               t2.value as current_rank,
               (t1.value - t2.value) as rank_change
        FROM "TeamGameMetric" t1
        JOIN "TeamGameMetric" t2 ON t1.team = t2.team 
            AND t1.league = t2.league 
            AND t1.season = t2.season
        WHERE t1.league = 'ncaaf' 
            AND t1.season = %s
            AND t1."statKey" = 'cfb_preseason_rank_ap_top_25'
            AND t2."statKey" = 'cfb_current_rank_ap_top_25'
    ''', (year,))
    
    regression_count = 0
    for row in cur.fetchall():
        team_name, preseason, current, change = row
        if change is not None:
            try:
                cur.execute(sql, ('ncaaf', year, game_key, game_date, team_name, 'cfb_rank_change_ap', float(change)))
                stats_added += 1
                regression_count += 1
            except:
                conn.rollback()
    
    conn.commit()
    print(f'  Regression metrics added: {regression_count}')
    
    cur.close()
    conn.close()
    
    print(f'')
    print(f'✅ CFB Advanced Stats ingested: {stats_added} total metrics')
    return {'success': True, 'stats_added': stats_added}

if __name__ == '__main__':
    import sys
    year = int(sys.argv[1]) if len(sys.argv) > 1 else 2025
    ingest_cfb_advanced(year)