#!/usr/bin/env python3
"""Import NHL historical game data from Kaggle into our database."""
import csv
import json
from pathlib import Path
from collections import defaultdict

DATA_DIR = Path("/var/www/html/eventheodds/data/betting")
KAGGLE_FILE = Path("/var/www/html/eventheodds/data/kaggle/nhl_data_extensive.csv")

def import_nhl_data():
    """Import Kaggle NHL data and merge with existing."""
    nhl_file = DATA_DIR / "nhl_historical.json"
    
    # Load existing NHL data
    existing = []
    if nhl_file.exists():
        with open(nhl_file) as f:
            existing = json.load(f)
    
    # Track existing games
    existing_keys = set()
    for game in existing:
        key = (game.get("home_team", ""), game.get("away_team", ""), game.get("date", "")[:10])
        existing_keys.add(key)
    
    print(f"Existing NHL records: {len(existing)}")
    print(f"Loading Kaggle NHL data...")
    
    # Group by game_id (two rows per game - home and away)
    games = defaultdict(list)
    
    with open(KAGGLE_FILE, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            game_id = row.get("game_id", "")
            if game_id:
                games[game_id].append(row)
    
    print(f"Unique games from Kaggle: {len(games)}")
    
    new_games = []
    for game_id, rows in games.items():
        if len(rows) != 2:
            continue
        
        # Find home and away
        home_row = None
        away_row = None
        for row in rows:
            if row.get("is_home") == "1" or row.get("is_home") == "True":
                home_row = row
            else:
                away_row = row
        
        if not home_row or not away_row:
            # Try alternate detection
            for row in rows:
                if row.get("is_home", "").lower() in ("1", "true", "1.0"):
                    home_row = row
                else:
                    away_row = row
        
        if not home_row or not away_row:
            continue
        
        date = home_row.get("date", "")[:10]
        home_team = home_row.get("team_name", "")
        away_team = away_row.get("team_name", "")
        
        if not date or not home_team or not away_team:
            continue
        
        # Check if already exists
        key = (home_team, away_team, date)
        if key in existing_keys:
            continue
        
        # Get betting data
        try:
            spread = float(home_row.get("spread", 0)) if home_row.get("spread") else None
            total = float(home_row.get("over_under", 0)) if home_row.get("over_under") else None
            moneyline = float(home_row.get("favorite_moneyline", 0)) if home_row.get("favorite_moneyline") else None
            
            home_goals = int(float(home_row.get("goals_for", 0))) if home_row.get("goals_for") else 0
            away_goals = int(float(away_row.get("goals_for", 0))) if away_row.get("goals_for") else 0
            home_won = home_row.get("won", "0") in ("1", "True", "1.0")
        except:
            spread = None
            total = None
            moneyline = None
            home_goals = 0
            away_goals = 0
            home_won = False
        
        # Determine moneyline for each team
        home_ml = None
        away_ml = None
        if moneyline:
            if spread and spread < 0:  # Home is favorite
                home_ml = int(moneyline)
                away_ml = int(abs(moneyline) + 20) if moneyline < 0 else int(-moneyline - 20)
            else:  # Away is favorite
                away_ml = int(moneyline)
                home_ml = int(abs(moneyline) + 20) if moneyline < 0 else int(-moneyline - 20)
        
        record = {
            "game_id": f"kaggle_nhl_{game_id}",
            "sport": "NHL",
            "date": date,
            "season": home_row.get("season", ""),
            "venue": home_row.get("venue", ""),
            "home_team": home_team,
            "away_team": away_team,
            "home_score": home_goals,
            "away_score": away_goals,
            "home_won": home_won,
            "spread": spread,
            "total": total,
            "home_ml": home_ml,
            "away_ml": away_ml,
            "odds": {
                "source": "kaggle",
                "sportsbooks": {
                    "consensus": {
                        "spread": spread,
                        "total": total,
                        "home_ml": home_ml,
                        "away_ml": away_ml
                    }
                }
            }
        }
        new_games.append(record)
        existing_keys.add(key)
    
    print(f"New games to add: {len(new_games)}")
    
    # Merge and save
    all_data = existing + new_games
    with open(nhl_file, "w") as f:
        json.dump(all_data, f, indent=2)
    
    print(f"\n=== NHL IMPORT COMPLETE ===")
    print(f"Previous: {len(existing)}")
    print(f"Added: {len(new_games)}")
    print(f"Total: {len(all_data)}")
    
    # Stats
    if new_games:
        dates = sorted([g["date"] for g in new_games if g.get("date")])
        seasons = set(g.get("season") for g in new_games if g.get("season"))
        print(f"Date range: {dates[0]} to {dates[-1]}")
        print(f"Seasons: {min(seasons)} to {max(seasons)}")

if __name__ == "__main__":
    import_nhl_data()
