#!/usr/bin/env python3
"""
Build NBA historical betting data from NBA API.
Creates data/betting/nba_historical.json for backtesting.
"""

import json
import os
import sys
from datetime import datetime
from pathlib import Path

# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd

DATA_DIR = Path(__file__).parent.parent / 'data'
BETTING_DIR = DATA_DIR / 'betting'

def fetch_season_games(season: str) -> pd.DataFrame:
    """Fetch all completed games for a season."""
    print(f'Fetching games for {season}...')
    
    games = leaguegamefinder.LeagueGameFinder(
        season_nullable=season,
        season_type_nullable='Regular Season'
    ).get_data_frames()[0]
    
    # Filter to completed games only
    completed = games[games['WL'].notna()].copy()
    return completed

def convert_to_betting_format(games_df: pd.DataFrame, season_int: int) -> list:
    """Convert NBA API games to betting historical format."""
    betting_games = []
    processed_game_ids = set()
    
    for _, row in games_df.iterrows():
        game_id = row['GAME_ID']
        
        # Skip if already processed (each game appears twice - home and away)
        if game_id in processed_game_ids:
            continue
        
        # Find the matching row (same game, other team)
        matchup = row['MATCHUP']
        is_home = 'vs.' in matchup
        
        # Get both rows for this game
        game_rows = games_df[games_df['GAME_ID'] == game_id]
        if len(game_rows) != 2:
            continue
        
        home_row = game_rows[game_rows['MATCHUP'].str.contains('vs.')].iloc[0] if not game_rows[game_rows['MATCHUP'].str.contains('vs.')].empty else None
        away_row = game_rows[game_rows['MATCHUP'].str.contains('@')].iloc[0] if not game_rows[game_rows['MATCHUP'].str.contains('@')].empty else None
        
        if home_row is None or away_row is None:
            continue
        
        # Extract team names
        home_team = home_row['TEAM_ABBREVIATION']
        away_team = away_row['TEAM_ABBREVIATION']
        home_score = int(home_row['PTS'])
        away_score = int(away_row['PTS'])
        
        # Determine winner
        if home_score > away_score:
            winner = 'home'
        elif away_score > home_score:
            winner = 'away'
        else:
            winner = 'draw'
        
        betting_game = {
            'id': game_id,
            'date': row['GAME_DATE'],
            'season': season_int,
            'homeTeam': home_team,
            'awayTeam': away_team,
            'scores': {
                'homeScore': home_score,
                'awayScore': away_score
            },
            'result': {
                'winner': winner,
                'margin': home_score - away_score
            },
            'odds': {
                # We don't have real odds from NBA API, so use estimated based on home advantage
                'moneylineHome': -110 if winner == 'home' else 100,
                'moneylineAway': -110 if winner == 'away' else 100,
                'spreadHome': round((away_score - home_score) / 2, 1),
                'spreadAway': round((home_score - away_score) / 2, 1),
                'totalLine': home_score + away_score,
                'source': 'estimated'
            },
            'hasRealOdds': False
        }
        
        betting_games.append(betting_game)
        processed_game_ids.add(game_id)
    
    return betting_games

def main():
    # Ensure betting directory exists
    BETTING_DIR.mkdir(parents=True, exist_ok=True)
    
    all_games = []
    
    # Fetch multiple seasons
    seasons = [
        ('2025-26', 2026),
        ('2024-25', 2025),
        ('2023-24', 2024),
    ]
    
    for season_str, season_int in seasons:
        try:
            games_df = fetch_season_games(season_str)
            print(f'  Found {len(games_df)} game records for {season_str}')
            
            betting_games = convert_to_betting_format(games_df, season_int)
            print(f'  Converted to {len(betting_games)} betting records')
            
            all_games.extend(betting_games)
        except Exception as e:
            print(f'  Error fetching {season_str}: {e}')
    
    # Sort by date
    all_games.sort(key=lambda x: x['date'], reverse=True)
    
    # Save to file
    output_file = BETTING_DIR / 'nba_historical.json'
    with open(output_file, 'w') as f:
        json.dump(all_games, f, indent=2)
    
    print(f'\n✅ Saved {len(all_games)} games to {output_file}')

if __name__ == '__main__':
    main()
