#!/usr/bin/env python3
"""
Backfill NCAAB scores from ESPN API (improved matching).

Matches games by date + team display names, using multiple strategies:
1. Exact match on ESPN displayName
2. School name prefix match (strip mascot)
3. Common abbreviation expansion
"""

import os
import sys
import re
import requests
import psycopg2
from datetime import datetime, timedelta
import time

ESPN_SCOREBOARD_URL = "https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/scoreboard"


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def fetch_espn_scoreboard(date_str):
    params = {
        'dates': date_str.replace('-', ''),
        'groups': '50',
        'limit': 400,
    }
    try:
        resp = requests.get(ESPN_SCOREBOARD_URL, params=params, timeout=30)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"  Error fetching ESPN for {date_str}: {e}")
        return None


def normalize(s):
    """Lowercase, strip punctuation, collapse spaces."""
    s = s.lower().strip()
    s = s.replace("'", "").replace("\u2019", "").replace(".", "").replace("-", " ").replace("&", "and")
    s = re.sub(r'\s+', ' ', s)
    return s


def extract_espn_games(data):
    """Extract all final games from ESPN response."""
    games = []
    if not data or 'events' not in data:
        return games

    for event in data['events']:
        status = event.get('status', {}).get('type', {}).get('name', '')
        if status not in ('STATUS_FINAL',):
            continue

        competitors = event.get('competitions', [{}])[0].get('competitors', [])
        if len(competitors) != 2:
            continue

        game = {'espn_id': event.get('id')}
        for comp in competitors:
            team = comp.get('team', {})
            display_name = team.get('displayName', '') or team.get('name', '')
            short_name = team.get('shortDisplayName', '')
            abbrev = team.get('abbreviation', '')
            score = comp.get('score')
            is_home = comp.get('homeAway') == 'home'

            try:
                score_int = int(score) if score else None
            except (ValueError, TypeError):
                score_int = None

            side = 'home' if is_home else 'away'
            game[f'{side}_display'] = display_name
            game[f'{side}_short'] = short_name
            game[f'{side}_abbrev'] = abbrev
            game[f'{side}_score'] = score_int

        if game.get('home_score') is not None and game.get('away_score') is not None:
            games.append(game)

    return games


def match_db_to_espn(db_team, espn_games, side):
    """Try to find matching ESPN game by team name."""
    db_norm = normalize(db_team)
    db_words = db_norm.split()

    # Build candidates list: (espn_game, display_name_for_side)
    for g in espn_games:
        if g.get('_matched'):
            continue
        espn_display = g.get(f'{side}_display', '')
        espn_norm = normalize(espn_display)

        # 1. Exact normalized match
        if db_norm == espn_norm:
            return g

        # 2. One contains the other (handles missing mascot)
        if db_norm in espn_norm or espn_norm in db_norm:
            return g

        # 3. School name prefix: strip last word (mascot) from both
        db_school = ' '.join(db_words[:-1]) if len(db_words) > 1 else db_norm
        espn_words = espn_norm.split()
        espn_school = ' '.join(espn_words[:-1]) if len(espn_words) > 1 else espn_norm

        if db_school and espn_school and (db_school == espn_school or
            db_school.startswith(espn_school) or espn_school.startswith(db_school)):
            return g

        # 4. Handle "St" vs "State" vs "St."
        db_st = db_norm.replace(' state ', ' st ').replace(' state', ' st')
        espn_st = espn_norm.replace(' state ', ' st ').replace(' state', ' st')
        if db_st == espn_st:
            return g
        db_st_school = ' '.join(db_st.split()[:-1])
        espn_st_school = ' '.join(espn_st.split()[:-1])
        if db_st_school and espn_st_school and db_st_school == espn_st_school:
            return g

    return None


def backfill():
    db_url = load_db_url()
    if not db_url:
        print("ERROR: No SPORTS_DATABASE_URL")
        return

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    # Find unscored games
    cur.execute("""
        SELECT id, "homeTeam", "awayTeam", "gameDate"::date as game_date
        FROM "SportsGame"
        WHERE league = 'ncaab'
          AND "homeScore" IS NULL
          AND "gameDate" < NOW() - INTERVAL '6 hours'
          AND "gameDate" >= '2025-11-01'
        ORDER BY "gameDate" DESC
    """)

    games = cur.fetchall()
    print(f"Found {len(games)} unscored NCAAB games")

    # Group by date
    by_date = {}
    for gid, home, away, gdate in games:
        ds = gdate.strftime('%Y-%m-%d')
        by_date.setdefault(ds, []).append((gid, home, away))

    print(f"Processing {len(by_date)} dates")
    total_updated = 0
    total_not_found = 0

    for date_str in sorted(by_date.keys(), reverse=True):
        db_games = by_date[date_str]
        espn_data = fetch_espn_scoreboard(date_str)
        if not espn_data:
            continue

        espn_games = extract_espn_games(espn_data)
        updated = 0
        not_found = 0

        for gid, home, away in db_games:
            # Try matching on home team first
            match = match_db_to_espn(home, espn_games, 'home')
            if match:
                # Verify away team loosely matches too
                away_norm = normalize(away)
                espn_away = normalize(match.get('away_display', ''))
                # Accept if first word matches or containment
                if (away_norm.split()[0] == espn_away.split()[0] or
                    away_norm in espn_away or espn_away in away_norm):
                    cur.execute("""
                        UPDATE "SportsGame"
                        SET "homeScore" = %s, "awayScore" = %s,
                            status = 'Final', "updatedAt" = NOW()
                        WHERE id = %s
                    """, (match['home_score'], match['away_score'], gid))
                    match['_matched'] = True
                    updated += 1
                    continue

            # Try matching on away team
            match = match_db_to_espn(away, espn_games, 'away')
            if match:
                home_norm = normalize(home)
                espn_home = normalize(match.get('home_display', ''))
                if (home_norm.split()[0] == espn_home.split()[0] or
                    home_norm in espn_home or espn_home in home_norm):
                    cur.execute("""
                        UPDATE "SportsGame"
                        SET "homeScore" = %s, "awayScore" = %s,
                            status = 'Final', "updatedAt" = NOW()
                        WHERE id = %s
                    """, (match['home_score'], match['away_score'], gid))
                    match['_matched'] = True
                    updated += 1
                    continue

            not_found += 1

        conn.commit()
        total_updated += updated
        total_not_found += not_found

        if updated > 0 or not_found > 0:
            print(f"  {date_str}: ESPN={len(espn_games)} final, matched={updated}, missed={not_found}")

        time.sleep(0.3)

    cur.close()
    conn.close()
    print(f"\n=== Summary ===")
    print(f"Updated: {total_updated}")
    print(f"Not found: {total_not_found}")


if __name__ == '__main__':
    backfill()
