#!/usr/bin/env python3
"""
Backfill MLB scores from ESPN API.
"""

import os
import sys
import requests
import psycopg2
from datetime import datetime
import time

DATABASE_URL = os.environ.get('SPORTS_DATABASE_URL', '')
ESPN_SCOREBOARD_URL = "https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/scoreboard"

def get_db_connection():
    url = DATABASE_URL.split('?')[0]
    return psycopg2.connect(url)

def normalize_team(name):
    if not name:
        return ''
    name = name.upper().strip()
    # MLB team abbreviation mappings
    mappings = {
        'ARIZONA DIAMONDBACKS': 'ARI', 'DIAMONDBACKS': 'ARI', 'D-BACKS': 'ARI',
        'ATLANTA BRAVES': 'ATL', 'BRAVES': 'ATL',
        'BALTIMORE ORIOLES': 'BAL', 'ORIOLES': 'BAL',
        'BOSTON RED SOX': 'BOS', 'RED SOX': 'BOS',
        'CHICAGO CUBS': 'CHC', 'CUBS': 'CHC',
        'CHICAGO WHITE SOX': 'CWS', 'WHITE SOX': 'CWS', 'CHW': 'CWS',
        'CINCINNATI REDS': 'CIN', 'REDS': 'CIN',
        'CLEVELAND GUARDIANS': 'CLE', 'GUARDIANS': 'CLE', 'INDIANS': 'CLE',
        'COLORADO ROCKIES': 'COL', 'ROCKIES': 'COL',
        'DETROIT TIGERS': 'DET', 'TIGERS': 'DET',
        'HOUSTON ASTROS': 'HOU', 'ASTROS': 'HOU',
        'KANSAS CITY ROYALS': 'KC', 'ROYALS': 'KC', 'KCR': 'KC',
        'LOS ANGELES ANGELS': 'LAA', 'ANGELS': 'LAA',
        'LOS ANGELES DODGERS': 'LAD', 'DODGERS': 'LAD',
        'MIAMI MARLINS': 'MIA', 'MARLINS': 'MIA',
        'MILWAUKEE BREWERS': 'MIL', 'BREWERS': 'MIL',
        'MINNESOTA TWINS': 'MIN', 'TWINS': 'MIN',
        'NEW YORK METS': 'NYM', 'METS': 'NYM',
        'NEW YORK YANKEES': 'NYY', 'YANKEES': 'NYY',
        'OAKLAND ATHLETICS': 'OAK', 'ATHLETICS': 'OAK', "A'S": 'OAK',
        'PHILADELPHIA PHILLIES': 'PHI', 'PHILLIES': 'PHI',
        'PITTSBURGH PIRATES': 'PIT', 'PIRATES': 'PIT',
        'SAN DIEGO PADRES': 'SD', 'PADRES': 'SD', 'SDP': 'SD',
        'SAN FRANCISCO GIANTS': 'SF', 'GIANTS': 'SF', 'SFG': 'SF',
        'SEATTLE MARINERS': 'SEA', 'MARINERS': 'SEA',
        'ST. LOUIS CARDINALS': 'STL', 'CARDINALS': 'STL', 'ST LOUIS': 'STL',
        'TAMPA BAY RAYS': 'TB', 'RAYS': 'TB', 'TBR': 'TB',
        'TEXAS RANGERS': 'TEX', 'RANGERS': 'TEX',
        'TORONTO BLUE JAYS': 'TOR', 'BLUE JAYS': 'TOR',
        'WASHINGTON NATIONALS': 'WAS', 'NATIONALS': 'WAS', 'WSH': 'WAS',
    }
    for full, abbr in mappings.items():
        if full in name or name == abbr:
            return abbr
    # Return first 3 chars as fallback
    return name[:3] if len(name) >= 3 else name

def fetch_espn_scoreboard(date_str):
    params = {'dates': date_str.replace('-', ''), 'limit': 50}
    try:
        resp = requests.get(ESPN_SCOREBOARD_URL, params=params, timeout=30)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"  Error fetching ESPN for {date_str}: {e}")
        return None

def extract_scores_from_espn(data):
    games = []
    if not data or 'events' not in data:
        return games
    for event in data['events']:
        try:
            status = event.get('status', {}).get('type', {}).get('name', '')
            if status not in ('STATUS_FINAL', 'STATUS_POSTPONED'):
                continue
            competitors = event.get('competitions', [{}])[0].get('competitors', [])
            if len(competitors) != 2:
                continue
            home = away = None
            home_score = away_score = None
            for comp in competitors:
                team_name = comp.get('team', {}).get('displayName', '') or comp.get('team', {}).get('name', '')
                score = comp.get('score')
                is_home = comp.get('homeAway') == 'home'
                if is_home:
                    home = normalize_team(team_name)
                    home_score = int(score) if score else None
                else:
                    away = normalize_team(team_name)
                    away_score = int(score) if score else None
            if home and away and home_score is not None and away_score is not None:
                games.append({'home': home, 'away': away, 'home_score': home_score, 'away_score': away_score, 'status': 'Final'})
        except:
            continue
    return games

def match_game(db_home, db_away, espn_games):
    db_home_norm = normalize_team(db_home)
    db_away_norm = normalize_team(db_away)
    for g in espn_games:
        if g['home'] == db_home_norm and g['away'] == db_away_norm:
            return g
        # Fuzzy match
        if db_home_norm[:3] == g['home'][:3] and db_away_norm[:3] == g['away'][:3]:
            return g
    return None

def backfill_scores():
    conn = get_db_connection()
    cur = conn.cursor()
    cur.execute("""
        SELECT id, "homeTeam", "awayTeam", "gameDate"::date as game_date
        FROM "SportsGame"
        WHERE league = 'mlb'
          AND "homeScore" IS NULL
          AND "gameDate" < NOW() - INTERVAL '3 hours'
        ORDER BY "gameDate" DESC
        LIMIT 5000
    """)
    games_to_update = cur.fetchall()
    print(f"Found {len(games_to_update)} MLB games without scores")
    if not games_to_update:
        return

    games_by_date = {}
    for game_id, home, away, game_date in games_to_update:
        date_str = game_date.strftime('%Y-%m-%d')
        if date_str not in games_by_date:
            games_by_date[date_str] = []
        games_by_date[date_str].append((game_id, home, away))

    print(f"Processing {len(games_by_date)} unique dates")
    updated = 0
    not_found = 0

    for date_str in sorted(games_by_date.keys(), reverse=True):
        db_games = games_by_date[date_str]
        print(f"\n{date_str}: {len(db_games)} games to check")
        espn_data = fetch_espn_scoreboard(date_str)
        if not espn_data:
            continue
        espn_games = extract_scores_from_espn(espn_data)
        print(f"  ESPN has {len(espn_games)} completed games")

        for game_id, home, away in db_games:
            match = match_game(home, away, espn_games)
            if match:
                cur.execute("""
                    UPDATE "SportsGame"
                    SET "homeScore" = %s, "awayScore" = %s, status = %s, "updatedAt" = NOW()
                    WHERE id = %s
                """, (match['home_score'], match['away_score'], match['status'], game_id))
                updated += 1
            else:
                not_found += 1
        conn.commit()
        time.sleep(0.3)

    cur.close()
    conn.close()
    print(f"\n=== Summary ===\nUpdated: {updated}\nNot found: {not_found}")

if __name__ == '__main__':
    if not DATABASE_URL:
        print("Error: SPORTS_DATABASE_URL not set")
        sys.exit(1)
    backfill_scores()
