#!/usr/bin/env python3
"""
Ingest Play-by-Play Data from ESPN
Captures game flow events for pace analysis, foul tracking, turnover impact, etc.

ESPN endpoint: /sports/{sport}/{league}/playbyplay?event={id}

Run: Daily after games complete
"""
import requests
import psycopg2
import os
import time
import argparse
from datetime import datetime, timezone, timedelta
import json

ESPN_BASE = 'https://site.api.espn.com/apis/site/v2/sports'
REQUEST_DELAY = 0.5
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}

LEAGUE_CONFIG = {
    'nba': {'sport': 'basketball', 'espn_league': 'nba'},
    'nfl': {'sport': 'football', 'espn_league': 'nfl'},
    'nhl': {'sport': 'hockey', 'espn_league': 'nhl'},
    'ncaab': {'sport': 'basketball', 'espn_league': 'mens-college-basketball'},
}


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def ensure_pbp_table(cur):
    """Create PlayByPlay table if needed"""
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PlayByPlay" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(50) NOT NULL,
            "gameId" BIGINT,
            "espnEventId" VARCHAR(50),
            "gameDate" TIMESTAMP,
            period INT,
            "gameClock" VARCHAR(20),
            "sequenceNumber" INT,
            "playType" VARCHAR(100),
            "playText" TEXT,
            "homeScore" INT,
            "awayScore" INT,
            "scoringPlay" BOOLEAN DEFAULT FALSE,
            "teamId" VARCHAR(50),
            "team" VARCHAR(20),
            "playerId" VARCHAR(50),
            "playerName" VARCHAR(200),
            "turnover" BOOLEAN DEFAULT FALSE,
            "foul" BOOLEAN DEFAULT FALSE,
            "timeout" BOOLEAN DEFAULT FALSE,
            raw JSONB,
            "createdAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE(league, "espnEventId", "sequenceNumber")
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS "PlayByPlay_league_game_idx" ON "PlayByPlay" (league, "gameId")')
    cur.execute('CREATE INDEX IF NOT EXISTS "PlayByPlay_playType_idx" ON "PlayByPlay" ("playType")')
    cur.execute('CREATE INDEX IF NOT EXISTS "PlayByPlay_period_idx" ON "PlayByPlay" (league, "gameId", period)')


def get_espn_events_for_date(sport, espn_league, date_str):
    """Get ESPN event IDs for a date"""
    url = f'{ESPN_BASE}/{sport}/{espn_league}/scoreboard?dates={date_str}'
    if espn_league in ['mens-college-basketball', 'college-football']:
        url += '&groups=50&limit=300'

    try:
        resp = requests.get(url, headers=HEADERS, timeout=15)
        if resp.status_code != 200:
            return []

        data = resp.json()
        events = []

        for event in data.get('events', []):
            event_id = event.get('id')
            status = event.get('status', {}).get('type', {}).get('state')

            # Only get completed games
            if status != 'post':
                continue

            competitions = event.get('competitions', [])
            if not competitions:
                continue

            comp = competitions[0]
            competitors = comp.get('competitors', [])

            home_team = None
            away_team = None
            for competitor in competitors:
                team = competitor.get('team', {})
                abbrev = team.get('abbreviation', '').upper()
                if competitor.get('homeAway') == 'home':
                    home_team = abbrev
                else:
                    away_team = abbrev

            if event_id:
                events.append({
                    'id': event_id,
                    'home': home_team,
                    'away': away_team,
                    'date': event.get('date')
                })

        return events
    except Exception as e:
        return []


def fetch_play_by_play(sport, espn_league, event_id):
    """Fetch play-by-play data from summary endpoint"""
    # Use summary endpoint which includes plays
    url = f'{ESPN_BASE}/{sport}/{espn_league}/summary?event={event_id}'

    try:
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            return None

        return resp.json()
    except:
        return None


def parse_plays(pbp_data, league):
    """Parse play-by-play data into structured records"""
    plays = []

    if not pbp_data:
        return plays

    # Get teams info
    teams_map = {}
    for team in pbp_data.get('boxscore', {}).get('teams', []):
        team_info = team.get('team', {})
        team_id = str(team_info.get('id', ''))
        teams_map[team_id] = team_info.get('abbreviation', '').upper()

    # Get plays - different structure for NFL vs NBA
    all_plays = pbp_data.get('plays', [])

    # NFL: plays are inside drives
    if not all_plays:
        drives = pbp_data.get('drives', {})
        if isinstance(drives, dict):
            previous_drives = drives.get('previous', [])
            current_drive = drives.get('current', {})
            drive_list = previous_drives + ([current_drive] if current_drive else [])

            for drive in drive_list:
                if isinstance(drive, dict):
                    drive_plays = drive.get('plays', [])
                    all_plays.extend(drive_plays)

    seq = 0
    for play in all_plays:
        seq += 1

        play_type = play.get('type', {})
        play_type_text = play_type.get('text', '') if isinstance(play_type, dict) else str(play_type)

        # Detect special plays
        text_lower = play.get('text', '').lower()
        is_turnover = 'turnover' in text_lower or 'steal' in text_lower or 'interception' in text_lower
        is_foul = 'foul' in text_lower
        is_timeout = 'timeout' in text_lower

        # Get score
        home_score = play.get('homeScore', 0)
        away_score = play.get('awayScore', 0)

        # Get period and clock
        period = play.get('period', {})
        period_num = period.get('number', 1) if isinstance(period, dict) else 1
        clock = play.get('clock', {})
        game_clock = clock.get('displayValue', '') if isinstance(clock, dict) else str(clock)

        # Get team
        team_data = play.get('team', {})
        team_id = str(team_data.get('id', '')) if isinstance(team_data, dict) else ''
        team_abbrev = teams_map.get(team_id, '')

        # Get participants (player info)
        participants = play.get('participants', [])
        player_id = None
        player_name = None
        if participants:
            athlete = participants[0].get('athlete', {})
            player_id = str(athlete.get('id', ''))
            player_name = athlete.get('displayName', '')

        plays.append({
            'period': period_num,
            'game_clock': game_clock,
            'sequence': seq,
            'play_type': play_type_text,
            'play_text': play.get('text', ''),
            'home_score': home_score,
            'away_score': away_score,
            'scoring_play': play.get('scoringPlay', False),
            'team_id': team_id,
            'team': team_abbrev,
            'player_id': player_id,
            'player_name': player_name,
            'turnover': is_turnover,
            'foul': is_foul,
            'timeout': is_timeout,
            'raw': play
        })

    return plays


def ingest_pbp_for_league(league, days_back=7, limit=50):
    """Ingest play-by-play for a league"""
    if league not in LEAGUE_CONFIG:
        print(f"Unknown league: {league}")
        return {'games': 0, 'plays': 0}

    config = LEAGUE_CONFIG[league]
    db_url = load_db_url()
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    ensure_pbp_table(cur)
    conn.commit()

    end_date = datetime.now(timezone.utc).date()
    start_date = end_date - timedelta(days=days_back)

    print(f"\n{league.upper()}: Fetching PBP from {start_date} to {end_date}")

    total_games = 0
    total_plays = 0
    games_processed = 0

    current_date = start_date
    while current_date <= end_date and games_processed < limit:
        date_str = current_date.strftime('%Y%m%d')
        events = get_espn_events_for_date(config['sport'], config['espn_league'], date_str)

        for event in events:
            if games_processed >= limit:
                break

            # Check if already ingested
            cur.execute('''
                SELECT COUNT(*) FROM "PlayByPlay"
                WHERE league = %s AND "espnEventId" = %s
            ''', (league, event['id']))

            if cur.fetchone()[0] > 0:
                continue

            time.sleep(REQUEST_DELAY)

            pbp_data = fetch_play_by_play(config['sport'], config['espn_league'], event['id'])
            if not pbp_data:
                continue

            plays = parse_plays(pbp_data, league)
            if not plays:
                continue

            # Find matching game in SportsGame (with 1-day tolerance for timezone issues)
            cur.execute('''
                SELECT id, "gameDate" FROM "SportsGame"
                WHERE league = %s
                  AND "homeTeam" = %s AND "awayTeam" = %s
                  AND "gameDate"::date BETWEEN %s AND %s + INTERVAL '1 day'
                LIMIT 1
            ''', (league, event['home'], event['away'], current_date, current_date))

            game = cur.fetchone()
            game_id = game[0] if game else None
            game_date = game[1] if game else current_date

            # Insert plays
            for play in plays:
                try:
                    cur.execute('''
                        INSERT INTO "PlayByPlay" (
                            league, "gameId", "espnEventId", "gameDate",
                            period, "gameClock", "sequenceNumber", "playType", "playText",
                            "homeScore", "awayScore", "scoringPlay",
                            "teamId", team, "playerId", "playerName",
                            turnover, foul, timeout, raw
                        )
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        ON CONFLICT (league, "espnEventId", "sequenceNumber") DO NOTHING
                    ''', (
                        league, game_id, event['id'], game_date,
                        play['period'], play['game_clock'], play['sequence'],
                        play['play_type'], play['play_text'],
                        play['home_score'], play['away_score'], play['scoring_play'],
                        play['team_id'], play['team'], play['player_id'], play['player_name'],
                        play['turnover'], play['foul'], play['timeout'],
                        json.dumps(play['raw'])
                    ))
                    total_plays += 1
                except Exception as e:
                    pass

            total_games += 1
            games_processed += 1

            if games_processed % 10 == 0:
                print(f"  {league}: Processed {games_processed} games, {total_plays} plays")
                conn.commit()

        current_date += timedelta(days=1)

    conn.commit()
    cur.close()
    conn.close()

    print(f"  {league}: Total {total_games} games, {total_plays} plays")
    return {'games': total_games, 'plays': total_plays}


def main():
    parser = argparse.ArgumentParser(description='Ingest play-by-play from ESPN')
    parser.add_argument('--leagues', type=str, default='nba,nfl',
                        help='Comma-separated leagues')
    parser.add_argument('--days', type=int, default=7,
                        help='Days to look back')
    parser.add_argument('--limit', type=int, default=100,
                        help='Max games per league')
    args = parser.parse_args()

    print("=" * 60)
    print("INGEST PLAY-BY-PLAY FROM ESPN")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    leagues = [l.strip().lower() for l in args.leagues.split(',')]

    total_games = 0
    total_plays = 0

    for league in leagues:
        result = ingest_pbp_for_league(league, days_back=args.days, limit=args.limit)
        total_games += result['games']
        total_plays += result['plays']

    print(f"\n{'='*60}")
    print(f"TOTAL: {total_games} games, {total_plays} plays")
    print("=" * 60)


if __name__ == '__main__':
    main()
