#!/usr/bin/env python3
"""
Backfill NCAAB scores from ESPN API.

Finds NCAAB games in SportsGame table with NULL scores and fetches
final scores from ESPN's college basketball scoreboard API.
"""

import os
import sys
import requests
import psycopg2
from datetime import datetime, timedelta
import time
import re

# Database connection
DATABASE_URL = os.environ.get('SPORTS_DATABASE_URL', '')

ESPN_SCOREBOARD_URL = "https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/scoreboard"

def get_db_connection():
    """Get database connection from URL."""
    # Remove schema parameter which psycopg2 doesn't support
    url = DATABASE_URL.split('?')[0]
    return psycopg2.connect(url)

def normalize_team(name):
    """Normalize team name for matching."""
    if not name:
        return ''
    name = name.upper().strip()
    # Remove common suffixes
    name = re.sub(r'\s*(WILDCATS|BULLDOGS|TIGERS|EAGLES|BEARS|LIONS|HAWKS|CARDINALS|BLUE DEVILS|TAR HEELS|WOLVERINES|SPARTANS|BUCKEYES|HOOSIERS|FIGHTING IRISH|JAYHAWKS|SOONERS|LONGHORNS|AGGIES|RAZORBACKS|VOLUNTEERS|GATORS|SEMINOLES|HURRICANES|CAVALIERS|HOKIES|WOLFPACK|DEMON DEACONS|YELLOW JACKETS|ORANGE|PANTHERS|CRIMSON TIDE|AUBURN TIGERS|REBELS|COMMODORES|GAMECOCKS|TERRAPINS|NITTANY LIONS|BADGERS|HAWKEYES|GOLDEN GOPHERS|BOILERMAKERS|ILLINI|HUSKERS|CYCLONES|MOUNTAINEERS|HORNED FROGS|RED RAIDERS|COWBOYS|COUGARS|DUCKS|BEAVERS|HUSKIES|SUN DEVILS|BRUINS|TROJANS|UTES|BUFFALOES)$', '', name)
    # Common abbreviations
    abbreviations = {
        'NORTH CAROLINA': 'UNC',
        'UNIVERSITY OF NORTH CAROLINA': 'UNC',
        'NC STATE': 'NCST',
        'FLORIDA STATE': 'FSU',
        'MICHIGAN STATE': 'MSU',
        'OHIO STATE': 'OSU',
        'PENN STATE': 'PSU',
        'ARIZONA STATE': 'ASU',
        'OKLAHOMA STATE': 'OKST',
        'TEXAS A&M': 'TAMU',
        'TEXAS AM': 'TAMU',
        'LOUISIANA STATE': 'LSU',
        'MISSISSIPPI STATE': 'MSST',
        'OLE MISS': 'MISS',
        'MISSISSIPPI': 'MISS',
        'SOUTHERN CALIFORNIA': 'USC',
        'BRIGHAM YOUNG': 'BYU',
        'TEXAS CHRISTIAN': 'TCU',
        'SOUTHERN METHODIST': 'SMU',
        'VIRGINIA TECH': 'VT',
        'GEORGIA TECH': 'GT',
        'CENTRAL FLORIDA': 'UCF',
        'CONNECTICUT': 'UCONN',
    }
    for full, abbr in abbreviations.items():
        if full in name:
            name = abbr
            break
    return name.strip()

def fetch_espn_scoreboard(date_str):
    """Fetch ESPN scoreboard for a specific date."""
    params = {
        'dates': date_str.replace('-', ''),
        'groups': '50',  # Division I
        'limit': 400,
    }
    try:
        resp = requests.get(ESPN_SCOREBOARD_URL, params=params, timeout=30)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"  Error fetching ESPN for {date_str}: {e}")
        return None

def extract_scores_from_espn(data):
    """Extract game scores from ESPN response."""
    games = []
    if not data or 'events' not in data:
        return games

    for event in data['events']:
        try:
            status = event.get('status', {}).get('type', {}).get('name', '')
            if status not in ('STATUS_FINAL', 'STATUS_POSTPONED'):
                continue

            competitors = event.get('competitions', [{}])[0].get('competitors', [])
            if len(competitors) != 2:
                continue

            home = away = None
            home_score = away_score = None

            for comp in competitors:
                team_name = comp.get('team', {}).get('displayName', '') or comp.get('team', {}).get('name', '')
                score = comp.get('score')
                is_home = comp.get('homeAway') == 'home'

                if is_home:
                    home = normalize_team(team_name)
                    home_score = int(score) if score else None
                else:
                    away = normalize_team(team_name)
                    away_score = int(score) if score else None

            if home and away and home_score is not None and away_score is not None:
                games.append({
                    'home': home,
                    'away': away,
                    'home_score': home_score,
                    'away_score': away_score,
                    'espn_id': event.get('id'),
                    'status': 'Final' if status == 'STATUS_FINAL' else 'Postponed',
                })
        except Exception as e:
            continue

    return games

def match_game(db_home, db_away, espn_games):
    """Find matching ESPN game for database game."""
    db_home_norm = normalize_team(db_home)
    db_away_norm = normalize_team(db_away)

    for g in espn_games:
        # Try exact match
        if g['home'] == db_home_norm and g['away'] == db_away_norm:
            return g
        # Try partial match (one team contains the other)
        if (db_home_norm in g['home'] or g['home'] in db_home_norm) and \
           (db_away_norm in g['away'] or g['away'] in db_away_norm):
            return g
    return None

def backfill_scores():
    """Main backfill function."""
    conn = get_db_connection()
    cur = conn.cursor()

    # Find NCAAB games without scores
    cur.execute("""
        SELECT id, "homeTeam", "awayTeam", "gameDate"::date as game_date
        FROM "SportsGame"
        WHERE league = 'ncaab'
          AND "homeScore" IS NULL
          AND "gameDate" < NOW() - INTERVAL '3 hours'
        ORDER BY "gameDate" DESC
        LIMIT 2000
    """)

    games_to_update = cur.fetchall()
    print(f"Found {len(games_to_update)} NCAAB games without scores")

    if not games_to_update:
        print("No games to update")
        return

    # Group by date
    games_by_date = {}
    for game_id, home, away, game_date in games_to_update:
        date_str = game_date.strftime('%Y-%m-%d')
        if date_str not in games_by_date:
            games_by_date[date_str] = []
        games_by_date[date_str].append((game_id, home, away))

    print(f"Processing {len(games_by_date)} unique dates")

    updated = 0
    not_found = 0

    for date_str in sorted(games_by_date.keys(), reverse=True):
        db_games = games_by_date[date_str]
        print(f"\n{date_str}: {len(db_games)} games to check")

        # Fetch ESPN data for this date
        espn_data = fetch_espn_scoreboard(date_str)
        if not espn_data:
            continue

        espn_games = extract_scores_from_espn(espn_data)
        print(f"  ESPN has {len(espn_games)} completed games")

        for game_id, home, away in db_games:
            match = match_game(home, away, espn_games)
            if match:
                cur.execute("""
                    UPDATE "SportsGame"
                    SET "homeScore" = %s,
                        "awayScore" = %s,
                        status = %s,
                        "updatedAt" = NOW()
                    WHERE id = %s
                """, (match['home_score'], match['away_score'], match['status'], game_id))
                updated += 1
                print(f"  Updated: {away} @ {home} -> {match['away_score']}-{match['home_score']}")
            else:
                not_found += 1

        conn.commit()
        time.sleep(0.5)  # Rate limiting

    cur.close()
    conn.close()

    print(f"\n=== Summary ===")
    print(f"Updated: {updated}")
    print(f"Not found: {not_found}")

if __name__ == '__main__':
    if not DATABASE_URL:
        print("Error: SPORTS_DATABASE_URL not set")
        sys.exit(1)

    backfill_scores()
