#!/usr/bin/env python3
"""
Backfill NCAAB scores from ESPN API (v3 - aggressive matching).

Strategy: For each date, match DB games to ESPN games using
a combination of first-word matching on both home + away teams.
This handles abbreviation differences (GW vs George Washington, etc.)
"""

import os
import sys
import re
import requests
import psycopg2
from datetime import datetime
import time

ESPN_URL = "https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/scoreboard"

# Known first-word mappings for tricky cases
FIRST_WORD_ALIASES = {
    'gw': 'george',
    'siu': 'siu',
    'uc': 'uc',
    'ul': 'ul',
    'mt': 'mount',
    'st': 'saint',
    'n': 'north',
    's': 'south',
    'e': 'east',
    'w': 'west',
    'se': 'southeast',
    'ne': 'northeast',
    'cs': 'cal',
    'csu': 'cal',
    'liu': 'liu',
    'vcu': 'vcu',
    'uab': 'uab',
    'ucf': 'ucf',
    'uconn': 'uconn',
    'umbc': 'umbc',
    'unc': 'unc',
    'unlv': 'unlv',
    'utep': 'utep',
    'utsa': 'utsa',
}

# Reverse aliases (ESPN → DB first word)
ESPN_FIRST_WORD_ALIASES = {
    'george': ['gw', 'george'],
    'mount': ['mt', 'mount'],
    'saint': ['st', 'saint'],
    'north': ['n', 'north'],
    'south': ['s', 'south'],
    'east': ['e', 'east'],
    'west': ['w', 'west'],
    'southeast': ['se', 'southeast'],
    'northeast': ['ne', 'northeast'],
    'cal': ['cs', 'csu', 'cal'],
    'connecticut': ['uconn', 'connecticut'],
    'siu': ['siu'],
}


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def fetch_espn_scoreboard(date_str):
    params = {'dates': date_str.replace('-', ''), 'groups': '50', 'limit': 400}
    try:
        resp = requests.get(ESPN_URL, params=params, timeout=30)
        resp.raise_for_status()
        return resp.json()
    except Exception as e:
        print(f"  Error fetching ESPN for {date_str}: {e}")
        return None


def normalize_word(w):
    """Normalize a single word for comparison."""
    w = w.lower().strip().rstrip('.').replace("'", "").replace("\u2019", "")
    return w


def get_match_keys(team_name):
    """Generate multiple match keys from a team name for flexible matching."""
    words = [normalize_word(w) for w in team_name.split() if w.strip()]
    if not words:
        return set()

    keys = set()
    # First word
    fw = words[0]
    keys.add(fw)

    # Check aliases
    if fw in FIRST_WORD_ALIASES:
        keys.add(FIRST_WORD_ALIASES[fw])

    # For compound names, add first two words joined
    if len(words) >= 2:
        keys.add(f"{words[0]}_{words[1]}")

    # Add reverse aliases
    for canonical, aliases in ESPN_FIRST_WORD_ALIASES.items():
        if fw in aliases or fw == canonical:
            keys.update(aliases)
            keys.add(canonical)

    return keys


def extract_espn_finals(data):
    """Extract final games from ESPN data."""
    games = []
    if not data or 'events' not in data:
        return games

    for ev in data['events']:
        status = ev.get('status', {}).get('type', {}).get('name', '')
        if status != 'STATUS_FINAL':
            continue

        comps = ev.get('competitions', [{}])[0].get('competitors', [])
        if len(comps) != 2:
            continue

        game = {}
        for c in comps:
            team = c.get('team', {})
            dn = team.get('displayName', '')
            score = c.get('score')
            side = 'home' if c.get('homeAway') == 'home' else 'away'
            try:
                game[f'{side}_score'] = int(score) if score else None
            except (ValueError, TypeError):
                game[f'{side}_score'] = None
            game[f'{side}_name'] = dn
            game[f'{side}_keys'] = get_match_keys(dn)

        if game.get('home_score') is not None and game.get('away_score') is not None:
            games.append(game)

    return games


def find_match(db_home, db_away, espn_games):
    """Find ESPN game matching both home and away teams."""
    home_keys = get_match_keys(db_home)
    away_keys = get_match_keys(db_away)

    candidates = []
    for g in espn_games:
        if g.get('_used'):
            continue
        # Check if home keys overlap AND away keys overlap
        home_match = home_keys & g['home_keys']
        away_match = away_keys & g['away_keys']
        if home_match and away_match:
            candidates.append(g)

    if len(candidates) == 1:
        return candidates[0]
    elif len(candidates) > 1:
        # Multiple matches - try to find best (most key overlap)
        best = max(candidates, key=lambda g:
            len(home_keys & g['home_keys']) + len(away_keys & g['away_keys']))
        return best

    # Try swapped (in case home/away is reversed)
    for g in espn_games:
        if g.get('_used'):
            continue
        home_match = home_keys & g['away_keys']
        away_match = away_keys & g['home_keys']
        if home_match and away_match:
            # Home/away swapped - return with swapped scores
            return {
                'home_score': g['away_score'],
                'away_score': g['home_score'],
                'home_name': g['away_name'],
                'away_name': g['home_name'],
                '_used_ref': g,
            }

    return None


def backfill():
    db_url = load_db_url()
    if not db_url:
        print("ERROR: No SPORTS_DATABASE_URL")
        return

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    cur.execute("""
        SELECT id, "homeTeam", "awayTeam", "gameDate"::date as gd
        FROM "SportsGame"
        WHERE league = 'ncaab'
          AND "homeScore" IS NULL
          AND "gameDate" < NOW() - INTERVAL '6 hours'
          AND "gameDate" >= '2025-11-01'
        ORDER BY "gameDate" DESC
    """)

    games = cur.fetchall()
    print(f"Found {len(games)} unscored NCAAB games")

    by_date = {}
    for gid, home, away, gd in games:
        ds = gd.strftime('%Y-%m-%d')
        by_date.setdefault(ds, []).append((gid, home, away))

    total_updated = 0
    total_missed = 0

    for date_str in sorted(by_date.keys(), reverse=True):
        db_games = by_date[date_str]

        # Fetch ESPN for the game date AND the day before (because ESPN
        # lists evening games under the schedule date, but our DB stores
        # UTC timestamps which may be the next day)
        from datetime import timedelta
        game_dt = datetime.strptime(date_str, '%Y-%m-%d')
        prev_date = (game_dt - timedelta(days=1)).strftime('%Y-%m-%d')

        espn_games = []
        for d in [date_str, prev_date]:
            espn_data = fetch_espn_scoreboard(d)
            if espn_data:
                espn_games.extend(extract_espn_finals(espn_data))

        updated = 0
        missed_names = []

        for gid, home, away in db_games:
            match = find_match(home, away, espn_games)
            if match:
                cur.execute("""
                    UPDATE "SportsGame"
                    SET "homeScore" = %s, "awayScore" = %s,
                        status = 'Final', "updatedAt" = NOW()
                    WHERE id = %s
                """, (match['home_score'], match['away_score'], gid))
                # Mark as used
                if '_used_ref' in match:
                    match['_used_ref']['_used'] = True
                else:
                    match['_used'] = True
                updated += 1
            else:
                missed_names.append(f"{away} @ {home}")

        conn.commit()
        total_updated += updated
        total_missed += len(missed_names)

        if updated or missed_names:
            print(f"  {date_str}: ESPN={len(espn_games)}, matched={updated}, missed={len(missed_names)}")
            if missed_names and len(missed_names) <= 5:
                for m in missed_names:
                    print(f"    ? {m}")

        time.sleep(0.3)

    cur.close()
    conn.close()
    print(f"\n=== Summary ===")
    print(f"Updated: {total_updated}")
    print(f"Not found: {total_missed}")


if __name__ == '__main__':
    backfill()
