#!/usr/bin/env python3
"""Import NFL scores and betting data from Kaggle."""
import csv
import json
from pathlib import Path

DATA_DIR = Path("/var/www/html/eventheodds/data/betting")
KAGGLE_FILE = Path("/var/www/html/eventheodds/data/kaggle/spreadspoke_scores.csv")

def import_nfl_data():
    """Import Kaggle NFL data with deduplication and merging."""
    nfl_file = DATA_DIR / "nfl_historical.json"
    
    # Load existing data
    existing = []
    if nfl_file.exists():
        with open(nfl_file) as f:
            existing = json.load(f)
    
    # Create lookup for updating existing records
    existing_map = {}
    for i, g in enumerate(existing):
        key = (g.get("home_team", "").lower().strip(), 
               g.get("away_team", "").lower().strip(), 
               g.get("date", "")[:10])
        existing_map[key] = i
    
    print(f"Existing NFL records: {len(existing)}")
    print(f"Loading {KAGGLE_FILE.name}...")
    
    new_games = []
    updated = 0
    
    with open(KAGGLE_FILE, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            date = row.get("schedule_date", "").strip()
            home = row.get("team_home", "").strip()
            away = row.get("team_away", "").strip()
            
            if not date or not home or not away:
                continue
            
            # Parse data
            try:
                score_home = int(row.get("score_home", 0)) if row.get("score_home") else None
                score_away = int(row.get("score_away", 0)) if row.get("score_away") else None
                spread = float(row.get("spread_favorite", 0)) if row.get("spread_favorite") else None
                total = float(row.get("over_under_line", 0)) if row.get("over_under_line") else None
                season = row.get("schedule_season", "")
                week = row.get("schedule_week", "")
                playoff = row.get("schedule_playoff", "FALSE") == "TRUE"
                stadium = row.get("stadium", "")
                weather_temp = row.get("weather_temperature", "")
                weather_wind = row.get("weather_wind_mph", "")
                favorite = row.get("team_favorite_id", "")
            except:
                continue
            
            # Determine home spread (negative if home is favorite)
            home_spread = None
            if spread:
                if favorite and favorite.lower() == home.lower():
                    home_spread = spread  # Already negative
                else:
                    home_spread = -spread  # Flip for away favorite
            
            key = (home.lower(), away.lower(), date)
            
            record = {
                "game_id": f"kaggle_nfl_{home}_{away}_{date}".replace(" ", "_"),
                "sport": "NFL",
                "date": date,
                "season": season,
                "week": week,
                "playoff": playoff,
                "home_team": home,
                "away_team": away,
                "home_score": score_home,
                "away_score": score_away,
                "home_won": score_home > score_away if score_home and score_away else None,
                "spread": home_spread,
                "total": total,
                "stadium": stadium,
                "weather": {
                    "temperature": weather_temp,
                    "wind_mph": weather_wind
                } if weather_temp else None,
                "odds": {
                    "source": "kaggle",
                    "sportsbooks": {
                        "consensus": {
                            "spread": home_spread,
                            "total": total
                        }
                    }
                }
            }
            
            if key in existing_map:
                # Merge with existing - add any missing data
                idx = existing_map[key]
                existing[idx].update({k: v for k, v in record.items() if v and not existing[idx].get(k)})
                updated += 1
            else:
                new_games.append(record)
                existing_map[key] = len(existing) + len(new_games) - 1
    
    print(f"New NFL games: {len(new_games)}")
    print(f"Updated existing: {updated}")
    
    # Merge and save
    all_data = existing + new_games
    with open(nfl_file, "w") as f:
        json.dump(all_data, f, indent=2)
    
    print(f"\n=== NFL IMPORT COMPLETE ===")
    print(f"Total: {len(all_data)}")
    
    if new_games:
        dates = sorted([g["date"] for g in new_games])
        seasons = set(g.get("season") for g in new_games if g.get("season"))
        print(f"Date range: {dates[0]} to {dates[-1]}")
        print(f"Seasons: {min(seasons) if seasons else "N/A"} to {max(seasons) if seasons else "N/A"}")

if __name__ == "__main__":
    import_nfl_data()
