#!/var/www/html/eventheodds/.venv-scraper/bin/python
"""
Backfill Period/Quarter/Half Scoring from ESPN
Populates homeQ1-Q4, awayQ1-Q4, homeOT, awayOT fields in SportsGame.

ESPN API endpoints:
- NBA/NCAAB: basketball/{league}/summary?event={id}
- NFL/NCAAF: football/{league}/summary?event={id}
- NHL: hockey/nhl/summary?event={id}

Run: Daily after games complete

NOTE: Some games may fail to match due to:
- Team name mismatches between data sources
- Games not yet available in ESPN API
- Timezone differences causing date mismatches
Expected error rate: 10-20% for recent games.
"""
import requests
import psycopg2
import os
import time
import argparse
from datetime import datetime, timezone, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed

ESPN_BASE = 'https://site.api.espn.com/apis/site/v2/sports'
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}

# Rate limiting
REQUEST_DELAY = 0.3  # seconds between requests
MAX_WORKERS = 3

# League to ESPN mapping
LEAGUE_CONFIG = {
    'nba': {'sport': 'basketball', 'espn_league': 'nba', 'periods': 4},
    'ncaab': {'sport': 'basketball', 'espn_league': 'mens-college-basketball', 'periods': 2},
    'nfl': {'sport': 'football', 'espn_league': 'nfl', 'periods': 4},
    'ncaaf': {'sport': 'football', 'espn_league': 'college-football', 'periods': 4},
    'nhl': {'sport': 'hockey', 'espn_league': 'nhl', 'periods': 3},
    'wnba': {'sport': 'basketball', 'espn_league': 'wnba', 'periods': 4},
}

# Team name normalization to ESPN abbreviations
TEAM_TO_ESPN = {
    # NBA
    'GSW': 'GS', 'WARRIORS': 'GS', 'GOLDEN STATE': 'GS', 'GOLDEN STATE WARRIORS': 'GS',
    'NYK': 'NY', 'KNICKS': 'NY', 'NEW YORK': 'NY', 'NEW YORK KNICKS': 'NY',
    'NOP': 'NO', 'PELICANS': 'NO', 'NEW ORLEANS': 'NO', 'NEW ORLEANS PELICANS': 'NO',
    'SAS': 'SA', 'SPURS': 'SA', 'SAN ANTONIO': 'SA', 'SAN ANTONIO SPURS': 'SA',
    'UTA': 'UTAH', 'JAZZ': 'UTAH', 'UTAH JAZZ': 'UTAH',
    'WAS': 'WSH', 'WIZARDS': 'WSH', 'WASHINGTON': 'WSH', 'WASHINGTON WIZARDS': 'WSH',
    'PHO': 'PHX', 'SUNS': 'PHX', 'PHOENIX': 'PHX', 'PHOENIX SUNS': 'PHX',
    # Full names to abbreviations
    'HAWKS': 'ATL', 'ATLANTA HAWKS': 'ATL', 'ATLANTA': 'ATL',
    'CELTICS': 'BOS', 'BOSTON CELTICS': 'BOS', 'BOSTON': 'BOS',
    'NETS': 'BKN', 'BROOKLYN NETS': 'BKN', 'BROOKLYN': 'BKN',
    'HORNETS': 'CHA', 'CHARLOTTE HORNETS': 'CHA', 'CHARLOTTE': 'CHA',
    'BULLS': 'CHI', 'CHICAGO BULLS': 'CHI', 'CHICAGO': 'CHI',
    'CAVALIERS': 'CLE', 'CLEVELAND CAVALIERS': 'CLE', 'CLEVELAND': 'CLE', 'CAVS': 'CLE',
    'MAVERICKS': 'DAL', 'DALLAS MAVERICKS': 'DAL', 'DALLAS': 'DAL', 'MAVS': 'DAL',
    'NUGGETS': 'DEN', 'DENVER NUGGETS': 'DEN', 'DENVER': 'DEN',
    'PISTONS': 'DET', 'DETROIT PISTONS': 'DET', 'DETROIT': 'DET',
    'ROCKETS': 'HOU', 'HOUSTON ROCKETS': 'HOU', 'HOUSTON': 'HOU',
    'PACERS': 'IND', 'INDIANA PACERS': 'IND', 'INDIANA': 'IND',
    'CLIPPERS': 'LAC', 'LOS ANGELES CLIPPERS': 'LAC', 'LA CLIPPERS': 'LAC',
    'LAKERS': 'LAL', 'LOS ANGELES LAKERS': 'LAL', 'LA LAKERS': 'LAL',
    'GRIZZLIES': 'MEM', 'MEMPHIS GRIZZLIES': 'MEM', 'MEMPHIS': 'MEM',
    'HEAT': 'MIA', 'MIAMI HEAT': 'MIA', 'MIAMI': 'MIA',
    'BUCKS': 'MIL', 'MILWAUKEE BUCKS': 'MIL', 'MILWAUKEE': 'MIL',
    'TIMBERWOLVES': 'MIN', 'MINNESOTA TIMBERWOLVES': 'MIN', 'MINNESOTA': 'MIN', 'TWOLVES': 'MIN',
    'THUNDER': 'OKC', 'OKLAHOMA CITY THUNDER': 'OKC', 'OKLAHOMA CITY': 'OKC',
    'MAGIC': 'ORL', 'ORLANDO MAGIC': 'ORL', 'ORLANDO': 'ORL',
    '76ERS': 'PHI', 'PHILADELPHIA 76ERS': 'PHI', 'PHILADELPHIA': 'PHI', 'SIXERS': 'PHI',
    'TRAIL BLAZERS': 'POR', 'PORTLAND TRAIL BLAZERS': 'POR', 'PORTLAND': 'POR', 'BLAZERS': 'POR',
    'KINGS': 'SAC', 'SACRAMENTO KINGS': 'SAC', 'SACRAMENTO': 'SAC',
    'RAPTORS': 'TOR', 'TORONTO RAPTORS': 'TOR', 'TORONTO': 'TOR',
    # NFL
    'KC': 'KC', 'CHIEFS': 'KC', 'KANSAS CITY': 'KC',
    'GB': 'GB', 'PACKERS': 'GB', 'GREEN BAY': 'GB',
    'NE': 'NE', 'PATRIOTS': 'NE', 'NEW ENGLAND': 'NE',
    'TB': 'TB', 'BUCCANEERS': 'TB', 'TAMPA BAY': 'TB', 'BUCS': 'TB',
    'NO': 'NO', 'SAINTS': 'NO',
    'SF': 'SF', '49ERS': 'SF', 'SAN FRANCISCO': 'SF', 'NINERS': 'SF',
    'SEA': 'SEA', 'SEAHAWKS': 'SEA', 'SEATTLE': 'SEA',
    'LAR': 'LAR', 'RAMS': 'LAR', 'LOS ANGELES RAMS': 'LAR', 'LA RAMS': 'LAR',
    'ARI': 'ARI', 'CARDINALS': 'ARI', 'ARIZONA': 'ARI',
    'BAL': 'BAL', 'RAVENS': 'BAL', 'BALTIMORE': 'BAL',
    'CIN': 'CIN', 'BENGALS': 'CIN', 'CINCINNATI': 'CIN',
    'PIT': 'PIT', 'STEELERS': 'PIT', 'PITTSBURGH': 'PIT',
    'BUF': 'BUF', 'BILLS': 'BUF', 'BUFFALO': 'BUF',
    'NYJ': 'NYJ', 'JETS': 'NYJ', 'NEW YORK JETS': 'NYJ',
    'NYG': 'NYG', 'GIANTS': 'NYG', 'NEW YORK GIANTS': 'NYG',
    'JAX': 'JAX', 'JAGUARS': 'JAX', 'JACKSONVILLE': 'JAX', 'JAC': 'JAX',
    'TEN': 'TEN', 'TITANS': 'TEN', 'TENNESSEE': 'TEN',
    'CAR': 'CAR', 'PANTHERS': 'CAR', 'CAROLINA': 'CAR',
    'LV': 'LV', 'RAIDERS': 'LV', 'LAS VEGAS': 'LV', 'OAK': 'LV',
    'LAC': 'LAC', 'CHARGERS': 'LAC', 'LOS ANGELES CHARGERS': 'LAC', 'LA CHARGERS': 'LAC',
    # NHL
    'DUCKS': 'ANA', 'ANAHEIM': 'ANA', 'ANAHEIM DUCKS': 'ANA', 'MIGHTY DUCKS': 'ANA',
    'COYOTES': 'ARI', 'ARIZONA': 'ARI', 'ARIZONA COYOTES': 'ARI', 'UTAH HOCKEY CLUB': 'UTAH', 'UHC': 'UTAH',
    'BRUINS': 'BOS', 'BOSTON': 'BOS', 'BOSTON BRUINS': 'BOS',
    'SABRES': 'BUF', 'BUFFALO': 'BUF', 'BUFFALO SABRES': 'BUF',
    'FLAMES': 'CGY', 'CALGARY': 'CGY', 'CALGARY FLAMES': 'CGY',
    'HURRICANES': 'CAR', 'CAROLINA': 'CAR', 'CAROLINA HURRICANES': 'CAR',
    'BLACKHAWKS': 'CHI', 'CHICAGO BLACKHAWKS': 'CHI',
    'AVALANCHE': 'COL', 'COLORADO': 'COL', 'COLORADO AVALANCHE': 'COL',
    'BLUE JACKETS': 'CBJ', 'COLUMBUS': 'CBJ', 'COLUMBUS BLUE JACKETS': 'CBJ',
    'STARS': 'DAL', 'DALLAS': 'DAL', 'DALLAS STARS': 'DAL',
    'RED WINGS': 'DET', 'DETROIT RED WINGS': 'DET',
    'OILERS': 'EDM', 'EDMONTON': 'EDM', 'EDMONTON OILERS': 'EDM',
    'PANTHERS': 'FLA', 'FLORIDA': 'FLA', 'FLORIDA PANTHERS': 'FLA',
    'KINGS': 'LA', 'LOS ANGELES': 'LA', 'LOS ANGELES KINGS': 'LA', 'LA KINGS': 'LA', 'LAK': 'LA',
    'WILD': 'MIN', 'MINNESOTA': 'MIN', 'MINNESOTA WILD': 'MIN',
    'CANADIENS': 'MTL', 'MONTREAL': 'MTL', 'MONTREAL CANADIENS': 'MTL',
    'PREDATORS': 'NSH', 'NASHVILLE': 'NSH', 'NASHVILLE PREDATORS': 'NSH',
    'DEVILS': 'NJ', 'NEW JERSEY': 'NJ', 'NEW JERSEY DEVILS': 'NJ', 'NJD': 'NJ',
    'ISLANDERS': 'NYI', 'NEW YORK ISLANDERS': 'NYI', 'NY ISLANDERS': 'NYI',
    'RANGERS': 'NYR', 'NEW YORK RANGERS': 'NYR', 'NY RANGERS': 'NYR',
    'SENATORS': 'OTT', 'OTTAWA': 'OTT', 'OTTAWA SENATORS': 'OTT',
    'FLYERS': 'PHI', 'PHILADELPHIA': 'PHI', 'PHILADELPHIA FLYERS': 'PHI',
    'PENGUINS': 'PIT', 'PITTSBURGH': 'PIT', 'PITTSBURGH PENGUINS': 'PIT',
    'SHARKS': 'SJ', 'SAN JOSE': 'SJ', 'SAN JOSE SHARKS': 'SJ', 'SJS': 'SJ',
    'KRAKEN': 'SEA', 'SEATTLE': 'SEA', 'SEATTLE KRAKEN': 'SEA',
    'BLUES': 'STL', 'ST. LOUIS': 'STL', 'ST LOUIS': 'STL', 'ST. LOUIS BLUES': 'STL',
    'LIGHTNING': 'TB', 'TAMPA BAY': 'TB', 'TAMPA BAY LIGHTNING': 'TB', 'TBL': 'TB',
    'MAPLE LEAFS': 'TOR', 'TORONTO': 'TOR', 'TORONTO MAPLE LEAFS': 'TOR',
    'CANUCKS': 'VAN', 'VANCOUVER': 'VAN', 'VANCOUVER CANUCKS': 'VAN',
    'GOLDEN KNIGHTS': 'VGK', 'VEGAS': 'VGK', 'VEGAS GOLDEN KNIGHTS': 'VGK',
    'CAPITALS': 'WSH', 'WASHINGTON': 'WSH', 'WASHINGTON CAPITALS': 'WSH',
    'JETS': 'WPG', 'WINNIPEG': 'WPG', 'WINNIPEG JETS': 'WPG',
}


def normalize_team(team_name):
    """Normalize team name to ESPN abbreviation"""
    if not team_name:
        return ''
    upper = team_name.upper().strip()
    return TEAM_TO_ESPN.get(upper, upper)


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def fetch_espn_boxscore(sport, espn_league, event_id):
    """Fetch period scoring from ESPN summary endpoint"""
    url = f'{ESPN_BASE}/{sport}/{espn_league}/summary?event={event_id}'

    try:
        resp = requests.get(url, headers=HEADERS, timeout=15)
        if resp.status_code != 200:
            return None

        data = resp.json()

        # Extract period scoring from boxscore
        boxscore = data.get('boxscore', {})
        teams = boxscore.get('teams', [])

        if len(teams) < 2:
            return None

        result = {'home': {}, 'away': {}}

        for team_data in teams:
            team_info = team_data.get('team', {})
            home_away = team_data.get('homeAway', '')

            if home_away not in ['home', 'away']:
                continue

            # Get line scores (period by period)
            statistics = team_data.get('statistics', [])

            # Try to find linescores in the header or competition
            header = data.get('header', {})
            competitions = header.get('competitions', [])

            if competitions:
                comp = competitions[0]
                competitors = comp.get('competitors', [])
                for competitor in competitors:
                    if competitor.get('homeAway') == home_away:
                        linescores = competitor.get('linescores', [])
                        for i, ls in enumerate(linescores):
                            period_num = i + 1
                            value = ls.get('value', ls.get('displayValue'))
                            if value is not None:
                                try:
                                    result[home_away][f'q{period_num}'] = int(float(value))
                                except (ValueError, TypeError):
                                    pass

        return result if (result['home'] or result['away']) else None

    except Exception as e:
        return None


def get_espn_event_id(game_date, home_team, away_team, sport, espn_league):
    """Find ESPN event ID by searching scoreboard (checks prev/same/next day for timezone issues)"""
    norm_home = normalize_team(home_team)
    norm_away = normalize_team(away_team)

    # Check previous day, same day, and next day (UTC to Eastern timezone offset)
    # Games at midnight UTC are often 7-8pm ET the previous day
    dates_to_check = [
        game_date,
        game_date - timedelta(days=1),  # Previous day - most common for UTC->ET
        game_date + timedelta(days=1),
    ]

    for check_date in dates_to_check:
        date_str = check_date.strftime('%Y%m%d')
        url = f'{ESPN_BASE}/{sport}/{espn_league}/scoreboard?dates={date_str}'

        if espn_league in ['mens-college-basketball', 'college-football']:
            url += '&groups=50&limit=300'

        try:
            resp = requests.get(url, headers=HEADERS, timeout=15)
            if resp.status_code != 200:
                continue

            data = resp.json()
            events = data.get('events', [])

            for event in events:
                competitions = event.get('competitions', [])
                if not competitions:
                    continue

                comp = competitions[0]
                competitors = comp.get('competitors', [])

                event_home = None
                event_away = None

                for competitor in competitors:
                    team = competitor.get('team', {})
                    abbrev = team.get('abbreviation', '').upper()
                    if competitor.get('homeAway') == 'home':
                        event_home = abbrev
                    else:
                        event_away = abbrev

                # Match by abbreviation (normalized)
                if event_home and event_away:
                    if (event_home == norm_home and event_away == norm_away):
                        return event.get('id')

            time.sleep(REQUEST_DELAY)
        except Exception:
            continue

    return None


def backfill_league(league, limit=500, days_back=30):
    """Backfill period scoring for a specific league"""
    if league not in LEAGUE_CONFIG:
        print(f"Unknown league: {league}")
        return {'updated': 0, 'errors': 0}

    config = LEAGUE_CONFIG[league]
    db_url = load_db_url()
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    # Find games needing period scoring
    cur.execute('''
        SELECT id, "gameDate", "homeTeam", "awayTeam", "homeScore", "awayScore"
        FROM "SportsGame"
        WHERE league = %s
          AND "homeQ1" IS NULL
          AND "homeScore" IS NOT NULL
          AND "gameDate" > NOW() - INTERVAL '%s days'
          AND "gameDate" < NOW()
        ORDER BY "gameDate" DESC
        LIMIT %s
    ''', (league, days_back, limit))

    games = cur.fetchall()
    print(f"\n{league.upper()}: Found {len(games)} games needing period scoring")

    updated = 0
    errors = 0

    for game_id, game_date, home_team, away_team, home_score, away_score in games:
        time.sleep(REQUEST_DELAY)

        # First try to find ESPN event ID
        event_id = get_espn_event_id(
            game_date, home_team, away_team,
            config['sport'], config['espn_league']
        )

        if not event_id:
            errors += 1
            continue

        time.sleep(REQUEST_DELAY)

        # Fetch period scoring
        periods = fetch_espn_boxscore(config['sport'], config['espn_league'], event_id)

        if not periods:
            errors += 1
            continue

        # Build update query
        updates = []
        params = []

        home_periods = periods.get('home', {})
        away_periods = periods.get('away', {})

        # Map period data to columns
        period_map = {
            'q1': ('homeQ1', 'awayQ1'),
            'q2': ('homeQ2', 'awayQ2'),
            'q3': ('homeQ3', 'awayQ3'),
            'q4': ('homeQ4', 'awayQ4'),
            'q5': ('homeOT', 'awayOT'),  # OT
        }

        for period_key, (home_col, away_col) in period_map.items():
            if period_key in home_periods:
                updates.append(f'"{home_col}" = %s')
                params.append(home_periods[period_key])
            if period_key in away_periods:
                updates.append(f'"{away_col}" = %s')
                params.append(away_periods[period_key])

        if updates:
            updates.append('"updatedAt" = NOW()')
            params.append(game_id)

            sql = f'UPDATE "SportsGame" SET {", ".join(updates)} WHERE id = %s'
            try:
                cur.execute(sql, params)
                updated += 1
                if updated % 50 == 0:
                    print(f"  {league}: Updated {updated} games...")
                    conn.commit()
            except Exception as e:
                print(f"  Error updating {game_id}: {e}")
                conn.rollback()
                errors += 1

    conn.commit()
    cur.close()
    conn.close()

    print(f"  {league}: Updated {updated}, Errors {errors}")
    return {'updated': updated, 'errors': errors}


def main():
    parser = argparse.ArgumentParser(description='Backfill period scoring from ESPN')
    parser.add_argument('--leagues', type=str, default='nba,nfl,nhl,ncaab,ncaaf',
                        help='Comma-separated leagues')
    parser.add_argument('--limit', type=int, default=500,
                        help='Max games per league')
    parser.add_argument('--days', type=int, default=60,
                        help='Days to look back')
    args = parser.parse_args()

    print("=" * 60)
    print("BACKFILL PERIOD SCORING FROM ESPN")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    leagues = [l.strip().lower() for l in args.leagues.split(',')]

    total_updated = 0
    total_errors = 0

    for league in leagues:
        result = backfill_league(league, limit=args.limit, days_back=args.days)
        total_updated += result['updated']
        total_errors += result['errors']

    print(f"\n{'='*60}")
    print(f"TOTAL: {total_updated} updated, {total_errors} errors")
    print("=" * 60)


if __name__ == '__main__':
    main()
