#!/usr/bin/env python3
"""Import NBA betting data 2007-2024 from Kaggle."""
import csv
import json
from pathlib import Path

DATA_DIR = Path("/var/www/html/eventheodds/data/betting")
KAGGLE_FILE = Path("/var/www/html/eventheodds/data/kaggle/nba_2008-2025.csv")

# NBA team abbreviations to full names
TEAM_MAP = {
    "atl": "Atlanta Hawks", "bos": "Boston Celtics", "bro": "Brooklyn Nets", "brk": "Brooklyn Nets",
    "cha": "Charlotte Hornets", "cho": "Charlotte Hornets", "chi": "Chicago Bulls", "cle": "Cleveland Cavaliers",
    "dal": "Dallas Mavericks", "den": "Denver Nuggets", "det": "Detroit Pistons", "gs": "Golden State Warriors",
    "gsw": "Golden State Warriors", "hou": "Houston Rockets", "ind": "Indiana Pacers", "lac": "LA Clippers",
    "lal": "Los Angeles Lakers", "mem": "Memphis Grizzlies", "mia": "Miami Heat", "mil": "Milwaukee Bucks",
    "min": "Minnesota Timberwolves", "no": "New Orleans Pelicans", "nop": "New Orleans Pelicans",
    "noh": "New Orleans Hornets", "nok": "New Orleans/Oklahoma City Hornets",
    "ny": "New York Knicks", "nyk": "New York Knicks", "okc": "Oklahoma City Thunder",
    "orl": "Orlando Magic", "phi": "Philadelphia 76ers", "pho": "Phoenix Suns", "phx": "Phoenix Suns",
    "por": "Portland Trail Blazers", "sac": "Sacramento Kings", "sa": "San Antonio Spurs", "sas": "San Antonio Spurs",
    "sea": "Seattle SuperSonics", "tor": "Toronto Raptors", "utah": "Utah Jazz", "uta": "Utah Jazz",
    "was": "Washington Wizards", "wsh": "Washington Wizards", "nj": "New Jersey Nets", "njn": "New Jersey Nets"
}

def import_nba_data():
    """Import Kaggle NBA data."""
    nba_file = DATA_DIR / "nba_historical.json"
    
    # Load existing data
    existing = []
    if nba_file.exists():
        with open(nba_file) as f:
            existing = json.load(f)
    
    existing_keys = set()
    for g in existing:
        key = (g.get("home_team", "").lower(), g.get("away_team", "").lower(), g.get("date", "")[:10])
        existing_keys.add(key)
    
    print(f"Existing NBA records: {len(existing)}")
    print(f"Loading {KAGGLE_FILE.name}...")
    
    new_games = []
    with open(KAGGLE_FILE, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            date = row.get("date", "").strip()
            away = row.get("away", "").strip().lower()
            home = row.get("home", "").strip().lower()
            
            if not date or not away or not home:
                continue
            
            home_name = TEAM_MAP.get(home, home.upper())
            away_name = TEAM_MAP.get(away, away.upper())
            
            key = (home_name.lower(), away_name.lower(), date)
            if key in existing_keys:
                continue
            
            try:
                spread = float(row.get("spread", 0)) if row.get("spread") else None
                total = float(row.get("total", 0)) if row.get("total") else None
                ml_away = int(float(row.get("moneyline_away", 0))) if row.get("moneyline_away") else None
                ml_home = int(float(row.get("moneyline_home", 0))) if row.get("moneyline_home") else None
                score_away = int(float(row.get("score_away", 0))) if row.get("score_away") else 0
                score_home = int(float(row.get("score_home", 0))) if row.get("score_home") else 0
                season = row.get("season", "")
                is_playoff = row.get("playoffs", "False") == "True"
            except:
                continue
            
            # Adjust spread if its the favorites spread
            fav = row.get("whos_favored", "")
            if fav == "away" and spread and spread > 0:
                spread = -spread
            elif fav == "home" and spread and spread > 0:
                spread = -spread
            
            record = {
                "game_id": f"kaggle_nba_{away}_{home}_{date}",
                "sport": "NBA",
                "date": date,
                "season": season,
                "playoff": is_playoff,
                "home_team": home_name,
                "away_team": away_name,
                "home_score": score_home,
                "away_score": score_away,
                "home_won": score_home > score_away,
                "spread": spread,
                "total": total,
                "home_ml": ml_home,
                "away_ml": ml_away,
                "odds": {
                    "source": "kaggle",
                    "sportsbooks": {
                        "vegas": {
                            "spread": spread,
                            "total": total,
                            "home_ml": ml_home,
                            "away_ml": ml_away
                        }
                    }
                }
            }
            new_games.append(record)
            existing_keys.add(key)
    
    print(f"New NBA games: {len(new_games)}")
    
    # Merge and save
    all_data = existing + new_games
    with open(nba_file, "w") as f:
        json.dump(all_data, f, indent=2)
    
    print(f"\n=== NBA IMPORT COMPLETE ===")
    print(f"Previous: {len(existing)}")
    print(f"Added: {len(new_games)}")
    print(f"Total: {len(all_data)}")
    
    if new_games:
        dates = sorted([g["date"] for g in new_games])
        seasons = set(g.get("season") for g in new_games if g.get("season"))
        print(f"Date range: {dates[0]} to {dates[-1]}")
        print(f"Seasons: {min(seasons)} to {max(seasons)}")

if __name__ == "__main__":
    import_nba_data()
