#!/usr/bin/env python3
"""
Backfill NCAAF scores from ESPN API.
"""

import os
import sys
import requests
import psycopg2
from datetime import datetime
import time
import re

DATABASE_URL = os.environ.get('SPORTS_DATABASE_URL', '')
ESPN_SCOREBOARD_URL = "https://site.api.espn.com/apis/site/v2/sports/football/college-football/scoreboard"

def get_db_connection():
    url = DATABASE_URL.split('?')[0]
    return psycopg2.connect(url)

def normalize_team(name):
    if not name:
        return ''
    name = name.upper().strip()
    # Common college football team mappings
    abbreviations = {
        'NORTH CAROLINA': 'UNC', 'MICHIGAN STATE': 'MSU', 'OHIO STATE': 'OSU',
        'PENN STATE': 'PSU', 'FLORIDA STATE': 'FSU', 'ARIZONA STATE': 'ASU',
        'TEXAS A&M': 'TAMU', 'LOUISIANA STATE': 'LSU', 'MISSISSIPPI STATE': 'MSST',
        'SOUTHERN CALIFORNIA': 'USC', 'BRIGHAM YOUNG': 'BYU', 'TEXAS CHRISTIAN': 'TCU',
        'VIRGINIA TECH': 'VT', 'GEORGIA TECH': 'GT', 'CENTRAL FLORIDA': 'UCF',
        'OLE MISS': 'MISS', 'MISSISSIPPI': 'MISS',
    }
    for full, abbr in abbreviations.items():
        if full in name:
            return abbr
    return name.strip()

def fetch_espn_scoreboard(date_str):
    params = {'dates': date_str.replace('-', ''), 'groups': '80', 'limit': 400}
    try:
        resp = requests.get(ESPN_SCOREBOARD_URL, params=params, timeout=30)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"  Error fetching ESPN for {date_str}: {e}")
        return None

def extract_scores_from_espn(data):
    games = []
    if not data or 'events' not in data:
        return games
    for event in data['events']:
        try:
            status = event.get('status', {}).get('type', {}).get('name', '')
            if status not in ('STATUS_FINAL', 'STATUS_POSTPONED'):
                continue
            competitors = event.get('competitions', [{}])[0].get('competitors', [])
            if len(competitors) != 2:
                continue
            home = away = None
            home_score = away_score = None
            for comp in competitors:
                team_name = comp.get('team', {}).get('displayName', '') or comp.get('team', {}).get('name', '')
                score = comp.get('score')
                is_home = comp.get('homeAway') == 'home'
                if is_home:
                    home = normalize_team(team_name)
                    home_score = int(score) if score else None
                else:
                    away = normalize_team(team_name)
                    away_score = int(score) if score else None
            if home and away and home_score is not None and away_score is not None:
                games.append({'home': home, 'away': away, 'home_score': home_score, 'away_score': away_score, 'status': 'Final'})
        except:
            continue
    return games

def match_game(db_home, db_away, espn_games):
    db_home_norm = normalize_team(db_home)
    db_away_norm = normalize_team(db_away)
    for g in espn_games:
        if g['home'] == db_home_norm and g['away'] == db_away_norm:
            return g
        if (db_home_norm in g['home'] or g['home'] in db_home_norm) and \
           (db_away_norm in g['away'] or g['away'] in db_away_norm):
            return g
    return None

def backfill_scores():
    conn = get_db_connection()
    cur = conn.cursor()
    cur.execute("""
        SELECT id, "homeTeam", "awayTeam", "gameDate"::date as game_date
        FROM "SportsGame"
        WHERE league = 'ncaaf'
          AND "homeScore" IS NULL
          AND "gameDate" < NOW() - INTERVAL '3 hours'
        ORDER BY "gameDate" DESC
        LIMIT 3000
    """)
    games_to_update = cur.fetchall()
    print(f"Found {len(games_to_update)} NCAAF games without scores")
    if not games_to_update:
        return

    games_by_date = {}
    for game_id, home, away, game_date in games_to_update:
        date_str = game_date.strftime('%Y-%m-%d')
        if date_str not in games_by_date:
            games_by_date[date_str] = []
        games_by_date[date_str].append((game_id, home, away))

    print(f"Processing {len(games_by_date)} unique dates")
    updated = 0
    not_found = 0

    for date_str in sorted(games_by_date.keys(), reverse=True):
        db_games = games_by_date[date_str]
        print(f"\n{date_str}: {len(db_games)} games to check")
        espn_data = fetch_espn_scoreboard(date_str)
        if not espn_data:
            continue
        espn_games = extract_scores_from_espn(espn_data)
        print(f"  ESPN has {len(espn_games)} completed games")

        for game_id, home, away in db_games:
            match = match_game(home, away, espn_games)
            if match:
                cur.execute("""
                    UPDATE "SportsGame"
                    SET "homeScore" = %s, "awayScore" = %s, status = %s, "updatedAt" = NOW()
                    WHERE id = %s
                """, (match['home_score'], match['away_score'], match['status'], game_id))
                updated += 1
            else:
                not_found += 1
        conn.commit()
        time.sleep(0.5)

    cur.close()
    conn.close()
    print(f"\n=== Summary ===\nUpdated: {updated}\nNot found: {not_found}")

if __name__ == '__main__':
    if not DATABASE_URL:
        print("Error: SPORTS_DATABASE_URL not set")
        sys.exit(1)
    backfill_scores()
