#!/usr/bin/env python3
"""
ETL: Populate Opening/Closing Lines from OddsSnapshot to SportsGame
Uses team name matching since externalGameIds don't match between sources.

Run: python3 scripts/etl_opening_closing_lines.py
"""
import psycopg2
import os
import re
from datetime import datetime, timezone, timedelta
from collections import defaultdict


# Comprehensive team name normalization
TEAM_ALIASES = {
    # NBA
    'nba': {
        'atlanta hawks': 'ATL', 'hawks': 'ATL', 'atlanta': 'ATL',
        'boston celtics': 'BOS', 'celtics': 'BOS', 'boston': 'BOS',
        'brooklyn nets': 'BKN', 'nets': 'BKN', 'brooklyn': 'BKN',
        'charlotte hornets': 'CHA', 'hornets': 'CHA', 'charlotte': 'CHA',
        'chicago bulls': 'CHI', 'bulls': 'CHI', 'chicago': 'CHI',
        'cleveland cavaliers': 'CLE', 'cavaliers': 'CLE', 'cavs': 'CLE', 'cleveland': 'CLE',
        'dallas mavericks': 'DAL', 'mavericks': 'DAL', 'mavs': 'DAL', 'dallas': 'DAL',
        'denver nuggets': 'DEN', 'nuggets': 'DEN', 'denver': 'DEN',
        'detroit pistons': 'DET', 'pistons': 'DET', 'detroit': 'DET',
        'golden state warriors': 'GSW', 'warriors': 'GSW', 'golden state': 'GSW',
        'houston rockets': 'HOU', 'rockets': 'HOU', 'houston': 'HOU',
        'indiana pacers': 'IND', 'pacers': 'IND', 'indiana': 'IND',
        'los angeles clippers': 'LAC', 'clippers': 'LAC', 'la clippers': 'LAC',
        'los angeles lakers': 'LAL', 'lakers': 'LAL', 'la lakers': 'LAL',
        'memphis grizzlies': 'MEM', 'grizzlies': 'MEM', 'memphis': 'MEM',
        'miami heat': 'MIA', 'heat': 'MIA', 'miami': 'MIA',
        'milwaukee bucks': 'MIL', 'bucks': 'MIL', 'milwaukee': 'MIL',
        'minnesota timberwolves': 'MIN', 'timberwolves': 'MIN', 'wolves': 'MIN', 'minnesota': 'MIN',
        'new orleans pelicans': 'NOP', 'pelicans': 'NOP', 'new orleans': 'NOP',
        'new york knicks': 'NYK', 'knicks': 'NYK', 'new york': 'NYK',
        'oklahoma city thunder': 'OKC', 'thunder': 'OKC', 'oklahoma city': 'OKC', 'okc': 'OKC',
        'orlando magic': 'ORL', 'magic': 'ORL', 'orlando': 'ORL',
        'philadelphia 76ers': 'PHI', '76ers': 'PHI', 'sixers': 'PHI', 'philadelphia': 'PHI',
        'phoenix suns': 'PHX', 'suns': 'PHX', 'phoenix': 'PHX',
        'portland trail blazers': 'POR', 'trail blazers': 'POR', 'blazers': 'POR', 'portland': 'POR',
        'sacramento kings': 'SAC', 'kings': 'SAC', 'sacramento': 'SAC',
        'san antonio spurs': 'SAS', 'spurs': 'SAS', 'san antonio': 'SAS',
        'toronto raptors': 'TOR', 'raptors': 'TOR', 'toronto': 'TOR',
        'utah jazz': 'UTA', 'jazz': 'UTA', 'utah': 'UTA',
        'washington wizards': 'WAS', 'wizards': 'WAS', 'washington': 'WAS',
    },
    # NFL
    'nfl': {
        'arizona cardinals': 'ARI', 'cardinals': 'ARI', 'arizona': 'ARI',
        'atlanta falcons': 'ATL', 'falcons': 'ATL',
        'baltimore ravens': 'BAL', 'ravens': 'BAL', 'baltimore': 'BAL',
        'buffalo bills': 'BUF', 'bills': 'BUF', 'buffalo': 'BUF',
        'carolina panthers': 'CAR', 'panthers': 'CAR', 'carolina': 'CAR',
        'chicago bears': 'CHI', 'bears': 'CHI',
        'cincinnati bengals': 'CIN', 'bengals': 'CIN', 'cincinnati': 'CIN',
        'cleveland browns': 'CLE', 'browns': 'CLE',
        'dallas cowboys': 'DAL', 'cowboys': 'DAL',
        'denver broncos': 'DEN', 'broncos': 'DEN',
        'detroit lions': 'DET', 'lions': 'DET',
        'green bay packers': 'GB', 'packers': 'GB', 'green bay': 'GB',
        'houston texans': 'HOU', 'texans': 'HOU',
        'indianapolis colts': 'IND', 'colts': 'IND', 'indianapolis': 'IND',
        'jacksonville jaguars': 'JAX', 'jaguars': 'JAX', 'jacksonville': 'JAX',
        'kansas city chiefs': 'KC', 'chiefs': 'KC', 'kansas city': 'KC',
        'las vegas raiders': 'LV', 'raiders': 'LV', 'las vegas': 'LV',
        'los angeles chargers': 'LAC', 'chargers': 'LAC',
        'los angeles rams': 'LAR', 'rams': 'LAR',
        'miami dolphins': 'MIA', 'dolphins': 'MIA',
        'minnesota vikings': 'MIN', 'vikings': 'MIN',
        'new england patriots': 'NE', 'patriots': 'NE', 'new england': 'NE',
        'new orleans saints': 'NO', 'saints': 'NO',
        'new york giants': 'NYG', 'giants': 'NYG',
        'new york jets': 'NYJ', 'jets': 'NYJ',
        'philadelphia eagles': 'PHI', 'eagles': 'PHI',
        'pittsburgh steelers': 'PIT', 'steelers': 'PIT', 'pittsburgh': 'PIT',
        'san francisco 49ers': 'SF', '49ers': 'SF', 'san francisco': 'SF', 'niners': 'SF',
        'seattle seahawks': 'SEA', 'seahawks': 'SEA', 'seattle': 'SEA',
        'tampa bay buccaneers': 'TB', 'buccaneers': 'TB', 'bucs': 'TB', 'tampa bay': 'TB',
        'tennessee titans': 'TEN', 'titans': 'TEN', 'tennessee': 'TEN',
        'washington commanders': 'WAS', 'commanders': 'WAS',
    },
    # NHL
    'nhl': {
        'anaheim ducks': 'ANA', 'ducks': 'ANA', 'anaheim': 'ANA',
        'arizona coyotes': 'ARI', 'coyotes': 'ARI',
        'boston bruins': 'BOS', 'bruins': 'BOS',
        'buffalo sabres': 'BUF', 'sabres': 'BUF',
        'calgary flames': 'CGY', 'flames': 'CGY', 'calgary': 'CGY',
        'carolina hurricanes': 'CAR', 'hurricanes': 'CAR', 'canes': 'CAR',
        'chicago blackhawks': 'CHI', 'blackhawks': 'CHI',
        'colorado avalanche': 'COL', 'avalanche': 'COL', 'avs': 'COL', 'colorado': 'COL',
        'columbus blue jackets': 'CBJ', 'blue jackets': 'CBJ', 'columbus': 'CBJ',
        'dallas stars': 'DAL', 'stars': 'DAL',
        'detroit red wings': 'DET', 'red wings': 'DET',
        'edmonton oilers': 'EDM', 'oilers': 'EDM', 'edmonton': 'EDM',
        'florida panthers': 'FLA', 'panthers': 'FLA', 'florida': 'FLA',
        'los angeles kings': 'LAK', 'kings': 'LAK',
        'minnesota wild': 'MIN', 'wild': 'MIN',
        'montreal canadiens': 'MTL', 'canadiens': 'MTL', 'habs': 'MTL', 'montreal': 'MTL',
        'nashville predators': 'NSH', 'predators': 'NSH', 'preds': 'NSH', 'nashville': 'NSH',
        'new jersey devils': 'NJD', 'devils': 'NJD', 'new jersey': 'NJD',
        'new york islanders': 'NYI', 'islanders': 'NYI',
        'new york rangers': 'NYR', 'rangers': 'NYR',
        'ottawa senators': 'OTT', 'senators': 'OTT', 'sens': 'OTT', 'ottawa': 'OTT',
        'philadelphia flyers': 'PHI', 'flyers': 'PHI',
        'pittsburgh penguins': 'PIT', 'penguins': 'PIT', 'pens': 'PIT',
        'san jose sharks': 'SJS', 'sharks': 'SJS', 'san jose': 'SJS',
        'seattle kraken': 'SEA', 'kraken': 'SEA',
        'st. louis blues': 'STL', 'blues': 'STL', 'st louis blues': 'STL', 'saint louis blues': 'STL',
        'tampa bay lightning': 'TBL', 'lightning': 'TBL', 'bolts': 'TBL',
        'toronto maple leafs': 'TOR', 'maple leafs': 'TOR', 'leafs': 'TOR',
        'utah hockey club': 'UTA', 'utah': 'UTA',
        'vancouver canucks': 'VAN', 'canucks': 'VAN', 'vancouver': 'VAN',
        'vegas golden knights': 'VGK', 'golden knights': 'VGK', 'knights': 'VGK', 'vegas': 'VGK',
        'washington capitals': 'WSH', 'capitals': 'WSH', 'caps': 'WSH',
        'winnipeg jets': 'WPG', 'jets': 'WPG', 'winnipeg': 'WPG',
    },
    # MLB (add as needed)
    'mlb': {},
    # NCAAB/NCAAF - complex, use fuzzy matching fallback
    'ncaab': {},
    'ncaaf': {},
}


def normalize_team(team_name: str, league: str) -> str:
    """Normalize team name to standard abbreviation"""
    if not team_name:
        return None

    # Already an abbreviation (2-4 chars, all caps)
    if len(team_name) <= 4 and team_name.isupper():
        return team_name

    # Lowercase for lookup
    team_lower = team_name.lower().strip()

    # Try exact match in aliases
    league_aliases = TEAM_ALIASES.get(league, {})
    if team_lower in league_aliases:
        return league_aliases[team_lower]

    # Try partial match (team name contains alias)
    for alias, abbrev in league_aliases.items():
        if alias in team_lower or team_lower in alias:
            return abbrev

    # Fallback: return first 3 chars uppercase
    return team_name[:3].upper()


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().strip('"').split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def run_etl():
    print("=" * 60)
    print("ETL: OPENING/CLOSING LINES (Team Name Matching)")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    stats = {
        'games_linked': 0,
        'opening_set': 0,
        'closing_set': 0,
        'already_linked': 0,
    }

    # Step 1: Link OddsSnapshot records to SportsGame by team names + date
    print("\n[1] Linking OddsSnapshot to SportsGame by team names...")

    # Get unlinked snapshots
    cur.execute('''
        SELECT id, league, "externalGameId", "gameDate", "homeTeam", "awayTeam"
        FROM "OddsSnapshot"
        WHERE "gameId" IS NULL
        ORDER BY league, "gameDate"
    ''')
    unlinked = cur.fetchall()
    print(f"  Found {len(unlinked)} unlinked snapshots")

    # Build SportsGame lookup index: (league, date, home_norm, away_norm) -> id
    cur.execute('''
        SELECT id, league, DATE("gameDate") as gd, "homeTeam", "awayTeam"
        FROM "SportsGame"
        WHERE "gameDate" >= '2024-10-01'
    ''')
    sg_lookup = {}
    for sg_id, league, gd, home, away in cur.fetchall():
        home_norm = normalize_team(home, league)
        away_norm = normalize_team(away, league)
        # Index by both orderings (in case home/away swapped)
        key1 = (league, str(gd), home_norm, away_norm)
        key2 = (league, str(gd), away_norm, home_norm)
        sg_lookup[key1] = sg_id
        sg_lookup[key2] = sg_id

    linked_count = 0
    for os_id, league, ext_id, game_date, home, away in unlinked:
        if not game_date:
            continue
        date_str = str(game_date.date())
        home_norm = normalize_team(home, league)
        away_norm = normalize_team(away, league)

        key = (league, date_str, home_norm, away_norm)
        sg_id = sg_lookup.get(key)

        if sg_id:
            cur.execute('''
                UPDATE "OddsSnapshot" SET "gameId" = %s WHERE id = %s
            ''', (sg_id, os_id))
            linked_count += 1

    conn.commit()
    stats['games_linked'] = linked_count
    print(f"  Linked {linked_count} snapshots to SportsGame records")

    # Step 2: Populate opening lines (earliest snapshot per game)
    print("\n[2] Populating opening lines...")

    cur.execute('''
        WITH earliest_snapshots AS (
            SELECT DISTINCT ON ("gameId")
                "gameId",
                "spreadHome",
                "spreadAway",
                "moneylineHome",
                "moneylineAway",
                "total",
                "snapshotAt"
            FROM "OddsSnapshot"
            WHERE "gameId" IS NOT NULL
              AND "spreadHome" IS NOT NULL
            ORDER BY "gameId", "snapshotAt" ASC
        )
        UPDATE "SportsGame" g SET
            "openingSpreadHome" = e."spreadHome",
            "openingSpreadAway" = e."spreadAway",
            "openingMoneylineHome" = e."moneylineHome",
            "openingMoneylineAway" = e."moneylineAway",
            "openingTotal" = e."total",
            "openingCapturedAt" = e."snapshotAt",
            "updatedAt" = NOW()
        FROM earliest_snapshots e
        WHERE g.id = e."gameId"
          AND (g."openingSpreadHome" IS NULL OR g."openingCapturedAt" > e."snapshotAt")
    ''')
    stats['opening_set'] = cur.rowcount
    print(f"  Set opening lines for {cur.rowcount} games")
    conn.commit()

    # Step 3: Populate closing lines (latest snapshot before game time)
    print("\n[3] Populating closing lines...")

    cur.execute('''
        WITH latest_pregame AS (
            SELECT DISTINCT ON (s."gameId")
                s."gameId",
                s."spreadHome",
                s."spreadAway",
                s."moneylineHome",
                s."moneylineAway",
                s."total",
                s."snapshotAt"
            FROM "OddsSnapshot" s
            JOIN "SportsGame" g ON s."gameId" = g.id
            WHERE s."gameId" IS NOT NULL
              AND s."spreadHome" IS NOT NULL
              AND s."snapshotAt" < g."gameDate"
            ORDER BY s."gameId", s."snapshotAt" DESC
        )
        UPDATE "SportsGame" g SET
            "closingSpreadHome" = l."spreadHome",
            "closingSpreadAway" = l."spreadAway",
            "closingMoneylineHome" = l."moneylineHome",
            "closingMoneylineAway" = l."moneylineAway",
            "closingTotal" = l."total",
            "closingCapturedAt" = l."snapshotAt",
            "updatedAt" = NOW()
        FROM latest_pregame l
        WHERE g.id = l."gameId"
          AND g."homeScore" IS NOT NULL  -- Game completed
          AND (g."closingSpreadHome" IS NULL OR g."closingCapturedAt" < l."snapshotAt")
    ''')
    stats['closing_set'] = cur.rowcount
    print(f"  Set closing lines for {cur.rowcount} games")
    conn.commit()

    # Step 4: Coverage report
    print("\n[4] Coverage Report:")
    cur.execute('''
        SELECT
            league,
            COUNT(*) as total,
            COUNT(*) FILTER (WHERE "homeScore" IS NOT NULL) as completed,
            COUNT(*) FILTER (WHERE "openingSpreadHome" IS NOT NULL) as has_opening,
            COUNT(*) FILTER (WHERE "closingSpreadHome" IS NOT NULL) as has_closing
        FROM "SportsGame"
        WHERE "gameDate" >= '2024-10-01'
          AND league IN ('nba', 'nfl', 'nhl', 'ncaab', 'epl', 'mma')
        GROUP BY league
        ORDER BY total DESC
    ''')

    print(f"  {'League':<10} {'Completed':>10} {'Opening':>12} {'Closing':>12}")
    print(f"  {'-'*10} {'-'*10} {'-'*12} {'-'*12}")
    for row in cur.fetchall():
        league, total, completed, opening, closing = row
        open_pct = f"{100*opening/completed:.1f}%" if completed > 0 else "0%"
        close_pct = f"{100*closing/completed:.1f}%" if completed > 0 else "0%"
        print(f"  {league:<10} {completed:>10} {opening:>8} ({open_pct:>5}) {closing:>8} ({close_pct:>5})")

    cur.close()
    conn.close()

    print("\n" + "=" * 60)
    print(f"ETL Complete:")
    print(f"  Snapshots linked: {stats['games_linked']}")
    print(f"  Opening lines set: {stats['opening_set']}")
    print(f"  Closing lines set: {stats['closing_set']}")
    print("=" * 60)

    return stats


if __name__ == '__main__':
    run_etl()
