#!/usr/bin/env python3
"""
Build player alias index from data/<league>/players.json files.
Outputs a JSON file used by semantic router for name detection.
"""
import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path


def data_dir() -> Path:
    if os.environ.get("NODE_ENV") == "production":
        return Path("/var/www/html/eventheodds/data")
    return Path.cwd() / "data"


def normalize_name(name: str) -> str:
    if not name:
        return ""
    # Lowercase, strip accents, remove punctuation
    name = name.lower().strip()
    name = re.sub(r"[’'`\".,]", "", name)
    name = re.sub(r"[-]", " ", name)
    # Remove common suffixes
    name = re.sub(r"\b(jr|sr|ii|iii|iv)\b$", "", name).strip()
    # Collapse whitespace
    name = re.sub(r"\s+", " ", name).strip()
    return name


def load_players(file_path: Path) -> list[str]:
    try:
        with file_path.open("r", encoding="utf-8") as f:
            data = json.load(f)
        if isinstance(data, dict) and "players" in data:
            data = data["players"]
        names = []
        for entry in data:
            if isinstance(entry, dict):
                name = entry.get("name") or entry.get("full_name") or entry.get("playerName")
            else:
                name = str(entry)
            if name:
                names.append(name)
        return names
    except Exception:
        return []


def build_alias_index():
    leagues = ["nba", "nfl", "nhl", "mlb", "wnba", "ncaaf", "ncaab", "eng.1", "epl"]
    aliases_by_league: dict[str, list[str]] = {}
    total = 0
    for league in leagues:
        # Try data/<league>/players.json
        path = data_dir() / league / "players.json"
        if not path.exists() and league == "eng.1":
            # Some files may use epl folder
            path = data_dir() / "epl" / "players.json"
        names = load_players(path)
        aliases = []
        for name in names:
            normalized = normalize_name(name)
            if normalized:
                aliases.append(normalized)
        # De-dupe
        aliases = sorted(set(aliases))
        if aliases:
            aliases_by_league[league] = aliases
            total += len(aliases)

    out = {
        "generatedAt": datetime.now(timezone.utc).isoformat(),
        "totalAliases": total,
        "aliasesByLeague": aliases_by_league,
    }

    out_path = data_dir() / "player_aliases.json"
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(out, indent=2), encoding="utf-8")
    print(f"✅ Wrote {total} aliases to {out_path}")


if __name__ == "__main__":
    build_alias_index()
