#!/usr/bin/env python3
"""
NHL Cross-Table Team Name Normalization

Normalizes dirty NHL team names across 8 tables (odds, metrics, performance)
to match the canonical names in SportsGame.

Phase 1: Fix 3 known name variants in 6 odds tables
Phase 2: Fix ALL CAPS names in GameOddsAltLine
Phase 3: Fix abbreviations + mixed formats in TeamGameMetric & PeriodPerformance

Usage:
    python3 nhl_normalize_all_tables.py --dry-run          # preview all phases
    python3 nhl_normalize_all_tables.py --phase 1          # run phase 1 only
    python3 nhl_normalize_all_tables.py                    # run all phases
"""

import argparse
import os
import re
import sys

import psycopg2

# ---------------------------------------------------------------------------
# Team name maps
# ---------------------------------------------------------------------------

# 32 current NHL teams: abbreviation → canonical full name
NHL_ABBREV_TO_FULL = {
    "ANA": "Anaheim Ducks",
    "BOS": "Boston Bruins",
    "BUF": "Buffalo Sabres",
    "CGY": "Calgary Flames",
    "CAR": "Carolina Hurricanes",
    "CHI": "Chicago Blackhawks",
    "COL": "Colorado Avalanche",
    "CBJ": "Columbus Blue Jackets",
    "DAL": "Dallas Stars",
    "DET": "Detroit Red Wings",
    "EDM": "Edmonton Oilers",
    "FLA": "Florida Panthers",
    "LAK": "Los Angeles Kings",
    "MIN": "Minnesota Wild",
    "MTL": "Montreal Canadiens",
    "NSH": "Nashville Predators",
    "NJD": "New Jersey Devils",
    "NYI": "New York Islanders",
    "NYR": "New York Rangers",
    "OTT": "Ottawa Senators",
    "PHI": "Philadelphia Flyers",
    "PIT": "Pittsburgh Penguins",
    "SJS": "San Jose Sharks",
    "SEA": "Seattle Kraken",
    "STL": "St. Louis Blues",
    "TBL": "Tampa Bay Lightning",
    "TOR": "Toronto Maple Leafs",
    "UTA": "Utah Hockey Club",
    "VAN": "Vancouver Canucks",
    "VGK": "Vegas Golden Knights",
    "WPG": "Winnipeg Jets",
    "WSH": "Washington Capitals",
    # Alternate abbreviations found in data
    "ARI": "Arizona Coyotes",
    "LA": "Los Angeles Kings",
    "NJ": "New Jersey Devils",
    "SJ": "San Jose Sharks",
    "TB": "Tampa Bay Lightning",
    "UTAH": "Utah Hockey Club",
}

# ALL CAPS full name → canonical (for GameOddsAltLine)
ALL_CAPS_TO_PROPER = {name.upper(): name for name in NHL_ABBREV_TO_FULL.values()}
# Historical team
ALL_CAPS_TO_PROPER["ARIZONA COYOTES"] = "Arizona Coyotes"
# Dirty ALL CAPS variants (no period, accented)
ALL_CAPS_TO_PROPER["ST LOUIS BLUES"] = "St. Louis Blues"
ALL_CAPS_TO_PROPER["MONTR\u00c9AL CANADIENS"] = "Montreal Canadiens"

# ALL CAPS nickname-only → canonical (for PeriodPerformance)
NICKNAME_TO_FULL = {
    "DUCKS": "Anaheim Ducks",
    "BRUINS": "Boston Bruins",
    "SABRES": "Buffalo Sabres",
    "FLAMES": "Calgary Flames",
    "HURRICANES": "Carolina Hurricanes",
    "BLACKHAWKS": "Chicago Blackhawks",
    "AVALANCHE": "Colorado Avalanche",
    "BLUE JACKETS": "Columbus Blue Jackets",
    "STARS": "Dallas Stars",
    "RED WINGS": "Detroit Red Wings",
    "OILERS": "Edmonton Oilers",
    "PANTHERS": "Florida Panthers",
    "KINGS": "Los Angeles Kings",
    "WILD": "Minnesota Wild",
    "CANADIENS": "Montreal Canadiens",
    "PREDATORS": "Nashville Predators",
    "DEVILS": "New Jersey Devils",
    "ISLANDERS": "New York Islanders",
    "RANGERS": "New York Rangers",
    "SENATORS": "Ottawa Senators",
    "FLYERS": "Philadelphia Flyers",
    "PENGUINS": "Pittsburgh Penguins",
    "SHARKS": "San Jose Sharks",
    "KRAKEN": "Seattle Kraken",
    "BLUES": "St. Louis Blues",
    "LIGHTNING": "Tampa Bay Lightning",
    "MAPLE LEAFS": "Toronto Maple Leafs",
    "HOCKEY CLUB": "Utah Hockey Club",
    "CANUCKS": "Vancouver Canucks",
    "GOLDEN KNIGHTS": "Vegas Golden Knights",
    "JETS": "Winnipeg Jets",
    "CAPITALS": "Washington Capitals",
    "COYOTES": "Arizona Coyotes",
}

# Known dirty name → canonical
NAME_FIXES = {
    "Utah Mammoth": "Utah Hockey Club",
    "Montréal Canadiens": "Montreal Canadiens",
    "St Louis Blues": "St. Louis Blues",
}

CANONICAL_NAMES = set(NHL_ABBREV_TO_FULL.values()) | {"Arizona Coyotes"}

# ---------------------------------------------------------------------------
# DB connection
# ---------------------------------------------------------------------------

def get_db_url():
    env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
    if os.path.exists(env_path):
        with open(env_path) as f:
            for line in f:
                line = line.strip()
                if line.startswith("SPORTS_DATABASE_URL="):
                    url = line.split("=", 1)[1]
                    return url.split("?")[0]
    url = os.environ.get("SPORTS_DATABASE_URL", "")
    return url.split("?")[0] if url else None


def connect_db():
    url = get_db_url()
    if not url:
        print("ERROR: SPORTS_DATABASE_URL not found")
        sys.exit(1)
    return psycopg2.connect(url)

# ---------------------------------------------------------------------------
# Phase 1: Fix 3 name variants across odds tables
# ---------------------------------------------------------------------------

def phase1(cur, dry_run):
    print("\n=== Phase 1: Fix 3 name variants across odds tables ===")
    tables_home_away = [
        "GameOdds", "OddsSnapshot", "ConsensusOdds", "PeriodOdds", "LineMovement",
    ]
    total = 0

    for old_name, new_name in NAME_FIXES.items():
        for table in tables_home_away:
            for col in ["homeTeam", "awayTeam"]:
                cur.execute(
                    f'SELECT COUNT(*) FROM "{table}" WHERE league=%s AND "{col}"=%s',
                    ("nhl", old_name),
                )
                count = cur.fetchone()[0]
                if count > 0:
                    print(f"  {table}.{col}: '{old_name}' → '{new_name}' ({count} rows)")
                    if not dry_run:
                        cur.execute(
                            f'UPDATE "{table}" SET "{col}"=%s WHERE league=%s AND "{col}"=%s',
                            (new_name, "nhl", old_name),
                        )
                    total += count

    print(f"  Phase 1 total: {total} rows {'(dry run)' if dry_run else 'updated'}")
    return total

# ---------------------------------------------------------------------------
# Phase 2: Fix GameOddsAltLine ALL CAPS → proper case
# ---------------------------------------------------------------------------

def phase2(cur, dry_run):
    print("\n=== Phase 2: Fix GameOddsAltLine ALL CAPS → proper case ===")
    total = 0

    for col in ["team", "opponent"]:
        cur.execute(
            f'SELECT DISTINCT "{col}" FROM "GameOddsAltLine" WHERE league=%s',
            ("nhl",),
        )
        distinct_names = [row[0] for row in cur.fetchall() if row[0]]

        for name in distinct_names:
            if name in CANONICAL_NAMES:
                continue
            # Try ALL CAPS map
            proper = ALL_CAPS_TO_PROPER.get(name)
            # Also try name fixes (e.g. Montréal)
            if not proper:
                proper = NAME_FIXES.get(name)
            if proper:
                cur.execute(
                    f'SELECT COUNT(*) FROM "GameOddsAltLine" WHERE league=%s AND "{col}"=%s',
                    ("nhl", name),
                )
                count = cur.fetchone()[0]
                print(f"  GameOddsAltLine.{col}: '{name}' → '{proper}' ({count} rows)")
                if not dry_run:
                    cur.execute(
                        f'UPDATE "GameOddsAltLine" SET "{col}"=%s WHERE league=%s AND "{col}"=%s',
                        (proper, "nhl", name),
                    )
                total += count
            else:
                print(f"  WARNING: GameOddsAltLine.{col}: unrecognized name '{name}'")

    print(f"  Phase 2 total: {total} rows {'(dry run)' if dry_run else 'updated'}")
    return total

# ---------------------------------------------------------------------------
# Phase 3: Fix TeamGameMetric + PeriodPerformance
# ---------------------------------------------------------------------------

def resolve_name(name):
    """Resolve a dirty team name to canonical. Returns None if already clean or unknown."""
    if not name or name in CANONICAL_NAMES:
        return None
    # Known fixes
    if name in NAME_FIXES:
        return NAME_FIXES[name]
    # Abbreviation (2-4 uppercase letters)
    upper = name.upper().strip()
    if name in NHL_ABBREV_TO_FULL:
        return NHL_ABBREV_TO_FULL[name]
    if upper in NHL_ABBREV_TO_FULL:
        return NHL_ABBREV_TO_FULL[upper]
    # ALL CAPS full name
    if upper in ALL_CAPS_TO_PROPER:
        return ALL_CAPS_TO_PROPER[upper]
    # ALL CAPS nickname
    if upper in NICKNAME_TO_FULL:
        return NICKNAME_TO_FULL[upper]
    return None


def fix_team_col_with_unique(cur, dry_run, table, col, unique_key_cols):
    """
    Normalize a team name column, handling unique constraint conflicts.
    When renaming would violate a unique constraint (canonical row already exists),
    delete the dirty duplicate instead.
    unique_key_cols: the other columns in the unique constraint besides `col`.
    """
    updated = 0
    deleted = 0

    cur.execute(
        f'SELECT DISTINCT "{col}" FROM "{table}" WHERE league=%s',
        ("nhl",),
    )
    distinct_names = [row[0] for row in cur.fetchall() if row[0]]

    for name in distinct_names:
        proper = resolve_name(name)
        if not proper:
            if name not in CANONICAL_NAMES:
                print(f"  WARNING: {table}.{col}: unrecognized name '{name}'")
            continue

        # Count total dirty rows
        cur.execute(
            f'SELECT COUNT(*) FROM "{table}" WHERE league=%s AND "{col}"=%s',
            ("nhl", name),
        )
        count = cur.fetchone()[0]
        if count == 0:
            continue

        if unique_key_cols:
            # Find dirty rows that would conflict (canonical row already exists)
            other_cols = ", ".join(f'd."{c}"' for c in unique_key_cols)
            join_conds = " AND ".join(
                f'c."{c}" = d."{c}"' for c in unique_key_cols
            )
            cur.execute(
                f"""SELECT COUNT(*) FROM "{table}" d
                    WHERE d.league=%s AND d."{col}"=%s
                    AND EXISTS (
                        SELECT 1 FROM "{table}" c
                        WHERE c.league=%s AND c."{col}"=%s
                        AND {join_conds}
                    )""",
                ("nhl", name, "nhl", proper),
            )
            conflict_count = cur.fetchone()[0]
        else:
            conflict_count = 0

        safe_count = count - conflict_count

        if conflict_count > 0:
            print(f"  {table}.{col}: '{name}' → delete {conflict_count} dupes (canonical exists)")
            if not dry_run:
                cur.execute(
                    f"""DELETE FROM "{table}" d
                        USING "{table}" c
                        WHERE d.league=%s AND d."{col}"=%s
                        AND c.league=%s AND c."{col}"=%s
                        AND {" AND ".join(f'c."{c}" = d."{c}"' for c in unique_key_cols)}""",
                    ("nhl", name, "nhl", proper),
                )
            deleted += conflict_count

        if safe_count > 0:
            print(f"  {table}.{col}: '{name}' → '{proper}' ({safe_count} rows)")
            if not dry_run:
                cur.execute(
                    f'UPDATE "{table}" SET "{col}"=%s WHERE league=%s AND "{col}"=%s',
                    (proper, "nhl", name),
                )
            updated += safe_count

    return updated, deleted


def phase3(cur, dry_run):
    print("\n=== Phase 3: Fix TeamGameMetric + PeriodPerformance ===")
    total_updated = 0
    total_deleted = 0

    # TeamGameMetric: unique on (league, season, gameKey, team, statKey)
    # 'team' column has unique constraint involvement
    u, d = fix_team_col_with_unique(
        cur, dry_run, "TeamGameMetric", "team",
        ["season", "gameKey", "statKey"],
    )
    total_updated += u
    total_deleted += d

    # 'opponent' column has no unique constraint involvement — simple update
    u, d = fix_team_col_with_unique(
        cur, dry_run, "TeamGameMetric", "opponent",
        [],  # no unique constraint on opponent
    )
    total_updated += u
    total_deleted += d

    # PeriodPerformance: unique on (league, season, team, period)
    # 'team' column has unique constraint involvement
    u, d = fix_team_col_with_unique(
        cur, dry_run, "PeriodPerformance", "team",
        ["season", "period"],
    )
    total_updated += u
    total_deleted += d

    label = "(dry run)" if dry_run else "updated"
    print(f"  Phase 3 total: {total_updated} rows {label}, {total_deleted} dupes deleted")
    return total_updated + total_deleted

# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(description="Normalize NHL team names across all tables")
    parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing")
    parser.add_argument("--phase", type=int, choices=[1, 2, 3], help="Run only this phase")
    args = parser.parse_args()

    conn = connect_db()
    cur = conn.cursor()
    grand_total = 0

    try:
        phases = [args.phase] if args.phase else [1, 2, 3]

        for p in phases:
            if p == 1:
                grand_total += phase1(cur, args.dry_run)
            elif p == 2:
                grand_total += phase2(cur, args.dry_run)
            elif p == 3:
                grand_total += phase3(cur, args.dry_run)

        if not args.dry_run:
            conn.commit()
            print(f"\nCommitted. Total rows updated: {grand_total}")
        else:
            conn.rollback()
            print(f"\nDry run complete. Total rows that would be updated: {grand_total}")

    except Exception as e:
        conn.rollback()
        print(f"\nERROR: {e}")
        raise
    finally:
        cur.close()
        conn.close()


if __name__ == "__main__":
    main()
