"""Player/team name normalization for cross-source matching."""

import re
import unicodedata
import logging
from typing import Optional, Tuple

log = logging.getLogger("pub-integrity")

# Common suffixes/prefixes to strip for matching
_SUFFIXES = re.compile(r'\s+(Jr\.?|Sr\.?|III|II|IV|V)$', re.IGNORECASE)
_NON_ALPHA = re.compile(r'[^a-z\s]')
_MULTI_SPACE = re.compile(r'\s+')

# Team abbreviation normalization map
TEAM_ABBREV_MAP = {
    # NBA
    "ATL": "ATL", "BOS": "BOS", "BKN": "BKN", "CHA": "CHA", "CHI": "CHI",
    "CLE": "CLE", "DAL": "DAL", "DEN": "DEN", "DET": "DET", "GSW": "GSW",
    "HOU": "HOU", "IND": "IND", "LAC": "LAC", "LAL": "LAL", "MEM": "MEM",
    "MIA": "MIA", "MIL": "MIL", "MIN": "MIN", "NOP": "NOP", "NYK": "NYK",
    "OKC": "OKC", "ORL": "ORL", "PHI": "PHI", "PHX": "PHX", "POR": "POR",
    "SAC": "SAC", "SAS": "SAS", "TOR": "TOR", "UTA": "UTA", "WAS": "WAS",
    # NHL
    "ANA": "ANA", "ARI": "ARI", "BUF": "BUF", "CGY": "CGY", "CAR": "CAR",
    "COL": "COL", "CBJ": "CBJ", "EDM": "EDM", "FLA": "FLA", "LA": "LAK",
    "LAK": "LAK", "MTL": "MTL", "NSH": "NSH", "NJD": "NJD", "NYI": "NYI",
    "NYR": "NYR", "OTT": "OTT", "PIT": "PIT", "SEA": "SEA", "SJS": "SJS",
    "STL": "STL", "TBL": "TBL", "VAN": "VAN", "VGK": "VGK", "WPG": "WPG",
    # MLB
    "BAL": "BAL", "TB": "TBR", "TBR": "TBR", "NYY": "NYY", "NYM": "NYM",
    "KC": "KCR", "KCR": "KCR", "CWS": "CWS", "SF": "SFG", "SFG": "SFG",
    "SD": "SDP", "SDP": "SDP", "TEX": "TEX", "HOU": "HOU", "SEA": "SEA",
    "CIN": "CIN", "MIL": "MIL", "PIT": "PIT", "STL": "STL",
    # Common aliases
    "GS": "GSW", "SA": "SAS", "NO": "NOP", "NY": "NYK",
    "PHO": "PHX", "UTAH": "UTA", "WASH": "WAS",
}

# Full team name -> canonical abbreviation used by roster/prop integrity checks.
# Keep this focused on leagues where prop eligibility matters today.
TEAM_NAME_MAP = {
    # NBA
    "atlanta hawks": "ATL", "boston celtics": "BOS", "brooklyn nets": "BKN",
    "charlotte hornets": "CHA", "chicago bulls": "CHI", "cleveland cavaliers": "CLE",
    "dallas mavericks": "DAL", "denver nuggets": "DEN", "detroit pistons": "DET",
    "golden state warriors": "GSW", "houston rockets": "HOU", "indiana pacers": "IND",
    "la clippers": "LAC", "los angeles clippers": "LAC", "la lakers": "LAL",
    "los angeles lakers": "LAL", "memphis grizzlies": "MEM", "miami heat": "MIA",
    "milwaukee bucks": "MIL", "minnesota timberwolves": "MIN", "new orleans pelicans": "NOP",
    "new york knicks": "NYK", "oklahoma city thunder": "OKC", "orlando magic": "ORL",
    "philadelphia 76ers": "PHI", "phoenix suns": "PHX", "portland trail blazers": "POR",
    "sacramento kings": "SAC", "san antonio spurs": "SAS", "toronto raptors": "TOR",
    "utah jazz": "UTA", "washington wizards": "WAS",
    # NHL
    "anaheim ducks": "ANA", "arizona coyotes": "ARI", "boston bruins": "BOS",
    "buffalo sabres": "BUF", "calgary flames": "CGY", "carolina hurricanes": "CAR",
    "chicago blackhawks": "CHI", "colorado avalanche": "COL", "columbus blue jackets": "CBJ",
    "dallas stars": "DAL", "detroit red wings": "DET", "edmonton oilers": "EDM",
    "florida panthers": "FLA", "los angeles kings": "LAK", "montreal canadiens": "MTL",
    "montréal canadiens": "MTL", "nashville predators": "NSH", "new jersey devils": "NJD",
    "new york islanders": "NYI", "new york rangers": "NYR", "ottawa senators": "OTT",
    "philadelphia flyers": "PHI", "pittsburgh penguins": "PIT", "san jose sharks": "SJS",
    "seattle kraken": "SEA", "st. louis blues": "STL", "tampa bay lightning": "TBL",
    "toronto maple leafs": "TOR", "utah hockey club": "UTA", "utah mammoth": "UTA",
    "vancouver canucks": "VAN", "vegas golden knights": "VGK", "washington capitals": "WSH",
    "winnipeg jets": "WPG",
    # MLB
    "arizona diamondbacks": "ARI", "atlanta braves": "ATL", "baltimore orioles": "BAL",
    "boston red sox": "BOS", "chicago cubs": "CHC", "chicago white sox": "CWS",
    "cincinnati reds": "CIN", "cleveland guardians": "CLE", "colorado rockies": "COL",
    "detroit tigers": "DET", "houston astros": "HOU", "kansas city royals": "KCR",
    "los angeles angels": "LAA", "los angeles dodgers": "LAD", "miami marlins": "MIA",
    "milwaukee brewers": "MIL", "minnesota twins": "MIN", "new york mets": "NYM",
    "new york yankees": "NYY", "athletics": "OAK", "oakland athletics": "OAK",
    "philadelphia phillies": "PHI", "pittsburgh pirates": "PIT", "san diego padres": "SDP",
    "san francisco giants": "SFG", "seattle mariners": "SEA", "st. louis cardinals": "STL",
    "tampa bay rays": "TBR", "texas rangers": "TEX", "toronto blue jays": "TOR",
    "washington nationals": "WSH",
}

LEAGUE_ABBREV_ALIASES = {
    "nhl": {
        "TB": "TBL",
        "NJ": "NJD",
        "LA": "LAK",
        "SJ": "SJS",
        "WSH": "WSH",
    },
    "mlb": {
        "TB": "TBR",
        "KC": "KCR",
        "SF": "SFG",
        "SD": "SDP",
        "WSH": "WSH",
    },
}

NICKNAME_ABBREV_MAP = {}
for team_name, team_abbrev in TEAM_NAME_MAP.items():
    nickname = team_name.split()[-1]
    if nickname and nickname not in NICKNAME_ABBREV_MAP:
        NICKNAME_ABBREV_MAP[nickname] = team_abbrev


def _normalize_team_text(value: str) -> str:
    if not value:
        return ""
    nfkd = unicodedata.normalize('NFKD', value)
    ascii_name = ''.join(c for c in nfkd if not unicodedata.combining(c))
    lowered = ascii_name.lower().strip()
    lowered = re.sub(r'[^a-z0-9\s.]', ' ', lowered)
    lowered = _MULTI_SPACE.sub(' ', lowered).strip()
    return lowered


def normalize_player_name(name: str) -> str:
    """Canonical player name: lowercase, no accents, no suffixes, single spaces."""
    if not name:
        return ""
    # Remove accents
    nfkd = unicodedata.normalize('NFKD', name)
    ascii_name = ''.join(c for c in nfkd if not unicodedata.combining(c))
    # Lowercase
    result = ascii_name.lower().strip()
    # Strip suffixes
    result = _SUFFIXES.sub('', result)
    # Remove non-alpha except spaces
    result = _NON_ALPHA.sub('', result)
    # Collapse whitespace
    result = _MULTI_SPACE.sub(' ', result).strip()
    return result


def normalize_team_abbrev(abbrev: str, league: Optional[str] = None) -> str:
    """Normalize a team abbreviation or full team name to canonical form."""
    if not abbrev:
        return ""
    upper = abbrev.upper().strip()
    league_key = (league or "").lower().strip()

    if league_key:
        alias = LEAGUE_ABBREV_ALIASES.get(league_key, {}).get(upper)
        if alias:
            return alias

    return TEAM_ABBREV_MAP.get(upper, upper)


def normalize_team_name(name: str) -> str:
    """Lowercase, stripped team display name for fuzzy matching."""
    if not name:
        return ""
    return _normalize_team_text(name)


def canonicalize_team_identifier(value: str, league: Optional[str] = None) -> str:
    """Resolve abbreviations, full team names, and nicknames to a canonical team code."""
    normalized = _normalize_team_text(value)
    if not normalized:
        return ""

    direct_name = TEAM_NAME_MAP.get(normalized)
    if direct_name:
        return direct_name

    nickname = normalized.split()[-1]
    if nickname in NICKNAME_ABBREV_MAP:
        return NICKNAME_ABBREV_MAP[nickname]

    return normalize_team_abbrev(value, league)


def player_names_match(name_a: str, name_b: str) -> bool:
    """Compare two player names after normalization."""
    return normalize_player_name(name_a) == normalize_player_name(name_b)


def extract_player_canonical_id(player_name: str, league: str) -> str:
    """Generate a canonical ID from player name + league."""
    norm = normalize_player_name(player_name)
    parts = norm.split()
    if len(parts) >= 2:
        return f"{parts[0]}_{parts[-1]}_{league}".lower()
    return f"{norm}_{league}".lower()


def parse_sgo_player_id(sgo_id: str) -> Tuple[str, str]:
    """Parse SGO playerID (FIRST_LAST_N_LEAGUE) into (display_name, league)."""
    if not sgo_id:
        return ("", "")
    parts = sgo_id.split("_")
    if len(parts) >= 3:
        league = parts[-1].lower()
        # Remove trailing number and league
        name_parts = parts[:-1]  # remove league
        if name_parts and name_parts[-1].isdigit():
            name_parts = name_parts[:-1]
        display = " ".join(p.capitalize() for p in name_parts)
        return (display, league)
    return (sgo_id, "")
