#!/usr/bin/env python3
"""
Fetch sports data from BallDontLie API and store it in the betting database format.

This script fetches game data for the 2024 season and converts it to the
format expected by sportsBettingData.ts for backtesting.

Usage:
    python scripts/fetch_balldontlie_data.py [sport]
    
    sport: nba, nfl, nhl, mlb, or all (default: all)

Environment:
    BALLDONTLIE_API_KEY: API key for BallDontLie (required)
"""

import os
import sys

# Controls whether The Odds API historical backfill runs for all sports.
# Enable for catch-up runs via CLI flag or env var.
BACKFILL_ALL_SPORTS = ('--odds-backfill-all' in sys.argv) or (os.environ.get('THE_ODDS_BACKFILL_ALL', '').lower() in ('1','true','yes'))
import json
import time
import hashlib
from datetime import datetime, timedelta
from pathlib import Path

# Try to import requests, fall back to urllib if not available
try:
    import requests
    USE_REQUESTS = True
except ImportError:
    import urllib.request
    import urllib.error
    USE_REQUESTS = False

# Configuration
API_BASE_URL = "https://api.balldontlie.io"
DATA_DIR = Path(__file__).parent.parent / "data" / "betting"
CACHE_DIR = Path(__file__).parent.parent / "data" / "balldontlie_cache"

# API key from environment
API_KEY = os.environ.get("BALLDONTLIE_API_KEY")
if not API_KEY:
    raise RuntimeError("BALLDONTLIE_API_KEY is not set. Put it in the environment (recommended: .env / systemd EnvironmentFile).")
THE_ODDS_API_KEY = os.environ.get("THE_ODDS_API_KEY", "")


def _norm_team_name(name: str) -> str:
    if not name:
        return ""
    n = name.strip().lower()
    for ch in ['.', ',', "'", '"']:
        n = n.replace(ch, '')
    n = ' '.join(n.split())
    aliases = {
        'la rams': 'los angeles rams',
        'la chargers': 'los angeles chargers',
        'ny giants': 'new york giants',
        'ny jets': 'new york jets',
    }
    return aliases.get(n, n)

# Sport configurations
SPORT_CONFIG = {
    "nba": {
        "api_prefix": "/v1",
        "seasons": [2024, 2025],  # Fetch both 2024 and 2025 seasons
        "output_file": "nba_historical.json"
    },
    "nfl": {
        "api_prefix": "/nfl/v1",
        "seasons": [2024, 2025],
        "output_file": "nfl_historical.json"
    },
    "nhl": {
        "api_prefix": "/nhl/v1",
        "seasons": [2024, 2025],
        "output_file": "nhl_historical.json"
    },
    "mlb": {
        "api_prefix": "/mlb/v1",
        "seasons": [2024],  # MLB 2025 not started yet
        "output_file": "mlb_historical.json"
    }
}


def make_request(url: str) -> dict:
    """Make an API request to BallDontLie."""
    headers = {"Authorization": API_KEY}
    
    if USE_REQUESTS:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        return response.json()
    else:
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=30) as response:
            return json.loads(response.read().decode())


def fetch_all_games(sport: str, season: int, per_page: int = 100, max_pages: int = 200) -> list:
    """Fetch ALL games for a given sport and season.

    BallDontLie APIs use different pagination mechanisms:
    - meta.next_page (page-based)
    - meta.next_cursor (cursor-based, e.g. NFL)
    """
    config = SPORT_CONFIG.get(sport)
    if not config:
        print(f"Unknown sport: {sport}")
        return []

    all_games = []
    page = 1
    cursor = None

    print(f"Fetching {sport.upper()} games for {season} season...")

    while page <= max_pages:
        cursor_part = f"&cursor={cursor}" if cursor is not None else ""
        url = f"{API_BASE_URL}{config['api_prefix']}/games?seasons[]={season}&per_page={per_page}{cursor_part}&page={page}"
        print(f"  Page {page}...", end=" ", flush=True)

        try:
            data = make_request(url)
            games = data.get('data', [])

            if not games:
                print('done (no more data)')
                break

            all_games.extend(games)
            print(f"{len(games)} games (total: {len(all_games)})")

            meta = data.get('meta', {}) or {}
            next_cursor = meta.get('next_cursor')
            next_page = meta.get('next_page')

            if next_cursor is not None:
                cursor = next_cursor
                page += 1
            elif next_page is not None:
                page = int(next_page)
            else:
                print('  ✓ Completed - all pages fetched')
                break

            time.sleep(0.3)

        except Exception as e:
            print(f"error: {e}")
            if page == 1:
                return []
            break

    return all_games


def fetch_real_odds(sport: str, season: int, per_page: int = 100, max_pages: int = 50) -> dict:
    """Fetch REAL odds from BallDontLie API for a given sport and season.
    Returns a dict mapping game_id -> odds data.
    """
    config = SPORT_CONFIG.get(sport)
    if not config:
        return {}
    
    all_odds = {}
    page = 1
    
    print(f"Fetching {sport.upper()} REAL odds for {season} season...")
    
    while page <= max_pages:
        # Different sports may have different odds endpoints
        if sport == "nba":
            # NBA odds are date-based, we'll try season games
            url = f"{API_BASE_URL}{config['api_prefix']}/odds?per_page={per_page}&page={page}"
        elif sport == "nfl":
            # NFL requires week parameter - handled separately in fetch_nfl_odds_by_week
            url = f"{API_BASE_URL}{config['api_prefix']}/odds?season={season}&week=1&per_page={per_page}&page={page}"
        else:
            # Try generic approach
            url = f"{API_BASE_URL}{config['api_prefix']}/odds?per_page={per_page}&page={page}"
        
        print(f"  Odds page {page}...", end=" ", flush=True)
        
        try:
            data = make_request(url)
            odds_list = data.get("data", [])
            
            if not odds_list:
                print("done (no more data)")
                break
            
            # Index by game_id
            for odds_entry in odds_list:
                game_id = odds_entry.get("game_id")
                if game_id:
                    if game_id not in all_odds:
                        all_odds[game_id] = {"moneyline": None, "spread": None, "total": None}
                    
                    odds_type = odds_entry.get("type", "").lower()
                    if odds_type == "2way" or odds_type == "moneyline":
                        all_odds[game_id]["moneyline"] = odds_entry
                    elif odds_type == "spread":
                        all_odds[game_id]["spread"] = odds_entry
                    elif odds_type in ("over_under", "total", "totals"):
                        all_odds[game_id]["total"] = odds_entry
            
            print(f"{len(odds_list)} entries (total games with odds: {len(all_odds)})")
            
            # Check for more pages
            meta = data.get("meta", {})
            next_page = meta.get("next_page")
            if next_page is None:
                print(f"  ✓ Completed - all odds pages fetched")
                break
            
            page += 1
            time.sleep(0.3)  # Rate limiting
            
        except Exception as e:
            print(f"error: {e}")
            break
    
    return all_odds




def fetch_nfl_odds_by_week(season: int) -> tuple:
    """Fetch NFL odds by iterating through weeks 1-22 (incl playoffs).
    Also fetches game details for each game with odds.
    Returns (games_with_odds_list, odds_by_game_id_dict).
    """
    config = SPORT_CONFIG.get("nfl")
    if not config:
        return [], {}
    
    all_odds = {}
    game_ids_with_odds = set()
    
    print(f"Fetching NFL REAL odds for {season} season (weeks 1-22)...")
    
    for week in range(1, 23):
        url = f"{API_BASE_URL}{config['api_prefix']}/odds?season={season}&week={week}&per_page=100"
        
        try:
            data = make_request(url)
            odds_list = data.get("data", [])
            
            if odds_list:
                for odds_entry in odds_list:
                    game_id = odds_entry.get("game_id")
                    if game_id:
                        game_ids_with_odds.add(game_id)
                        if game_id not in all_odds:
                            all_odds[game_id] = {
                                "moneyline_home": odds_entry.get("moneyline_home_odds"),
                                "moneyline_away": odds_entry.get("moneyline_away_odds"),
                                "spread_home": odds_entry.get("spread_home_value"),
                                "spread_home_odds": odds_entry.get("spread_home_odds"),
                                "spread_away": odds_entry.get("spread_away_value"),
                                "spread_away_odds": odds_entry.get("spread_away_odds"),
                                "total": odds_entry.get("total_value"),
                                "total_over_odds": odds_entry.get("total_over_odds"),
                                "total_under_odds": odds_entry.get("total_under_odds"),
                                "vendor": odds_entry.get("vendor"),
                            }
                
                print(f"  Week {week}: {len(odds_list)} odds for {len([oid for oid in game_ids_with_odds if all_odds.get(oid)])} games")
            
            time.sleep(0.2)
            if quota_exhausted:
                break
            
        except Exception as e:
            if "400" not in str(e):  # Ignore 400 errors for weeks with no data
                print(f"  Week {week}: error - {e}")
            continue
    
    print(f"  Found {len(game_ids_with_odds)} unique games with odds")
    
    # Now fetch game details for each game_id
    games_with_odds = []
    print(f"  Fetching game details for {len(game_ids_with_odds)} games...")
    
    fetched = 0
    for game_id in game_ids_with_odds:
        try:
            url = f"{API_BASE_URL}{config['api_prefix']}/games/{game_id}"
            data = make_request(url)
            game = data.get("data", data)
            if game and game.get("id"):
                # Merge odds into game
                game_odds = all_odds.get(game_id, {})
                game["_real_odds"] = game_odds
                game["_has_real_odds"] = True
                games_with_odds.append(game)
                fetched += 1
            time.sleep(0.1)
        except Exception as e:
            pass  # Skip games we can't fetch
    
    print(f"  Fetched details for {fetched} games")
    return games_with_odds, all_odds


def fetch_the_odds_api_fallback(sport: str, games: list) -> dict:
    """Fetch odds from The Odds API historical endpoint and map them by (date, home, away).

    Uses a single snapshot per date (noon UTC) to keep usage reasonable.
    """
    if not THE_ODDS_API_KEY:
        print('  ⚠️ THE_ODDS_API_KEY not set, skipping fallback')
        return {}

    sport_map = {
        'nfl': 'americanfootball_nfl',
        'nba': 'basketball_nba',
        'mlb': 'baseball_mlb',
        'nhl': 'icehockey_nhl'
    }
    api_sport = sport_map.get(sport)
    if not api_sport:
        return {}

    # Only fetch dates that actually need odds
    missing_dates = sorted({g.get('date') for g in games if g.get('date') and not g.get('hasRealOdds')})
    if not missing_dates:
        return {}

    print(f"  Fetching fallback odds from The Odds API for {sport.upper()} ({len(missing_dates)} dates)...")

    out = {}
    quota_exhausted = False
    for d in missing_dates:
        snapshot_iso = f"{d}T00:00:00Z"
        url = f"https://api.the-odds-api.com/v4/historical/sports/{api_sport}/odds"
        params = f"?apiKey={THE_ODDS_API_KEY}&regions=us&markets=h2h,spreads,totals&oddsFormat=american&dateFormat=iso&date={snapshot_iso}"

        try:
            if USE_REQUESTS:
                resp = requests.get(url + params, timeout=30)
                if resp.status_code != 200:
                    # Stop early if we ran out of The Odds API credits (prevents hammering 401s).
                    if resp.status_code == 401:
                        try:
                            j = resp.json()
                            if j.get('error_code') == 'OUT_OF_USAGE_CREDITS':
                                print('  ❌ The Odds API quota exhausted (OUT_OF_USAGE_CREDITS). Stopping backfill early.')
                                quota_exhausted = True
                                break
                        except Exception:
                            pass
                    continue
                payload = resp.json()
            else:
                req = urllib.request.Request(url + params)
                with urllib.request.urlopen(req, timeout=30) as response:
                    payload = json.loads(response.read().decode())

            # payload may be wrapped
            events = None
            if isinstance(payload, list):
                events = payload
            elif isinstance(payload, dict):
                if isinstance(payload.get('data'), list):
                    events = payload.get('data')
                elif isinstance(payload.get('data'), dict) and isinstance(payload['data'].get('data'), list):
                    events = payload['data']['data']

            if not events:
                continue

            for ev in events:
                home = ev.get('home_team')
                away = ev.get('away_team')
                if not home or not away:
                    continue

                # pick first bookmaker market set (good enough for now)
                bookmakers = ev.get('bookmakers', []) or []
                if not bookmakers:
                    continue
                bm = bookmakers[0]
                markets = {m.get('key'): m for m in (bm.get('markets', []) or [])}

                h2h = (markets.get('h2h') or {}).get('outcomes', []) or []
                spreads = (markets.get('spreads') or {}).get('outcomes', []) or []
                totals = (markets.get('totals') or {}).get('outcomes', []) or []

                ml_home = ml_away = None
                spread_home = spread_away = None
                spread_odds_home = spread_odds_away = -110
                total_line = None

                for o in h2h:
                    if o.get('name') == home:
                        ml_home = o.get('price')
                    elif o.get('name') == away:
                        ml_away = o.get('price')

                for o in spreads:
                    if o.get('name') == home:
                        spread_home = o.get('point')
                        spread_odds_home = o.get('price', -110)
                    elif o.get('name') == away:
                        spread_away = o.get('point')
                        spread_odds_away = o.get('price', -110)

                if spread_home is None and spread_away is not None:
                    spread_home = -float(spread_away)
                if spread_away is None and spread_home is not None:
                    spread_away = -float(spread_home)

                for o in totals:
                    pt = o.get('point')
                    if pt is not None:
                        total_line = pt
                        break

                if not any([ml_home, ml_away, spread_home, total_line]):
                    continue

                out[(d, _norm_team_name(home), _norm_team_name(away))] = {
                    'moneylineHome': ml_home,
                    'moneylineAway': ml_away,
                    'spreadHome': spread_home,
                    'spreadAway': spread_away,
                    'spreadOddsHome': spread_odds_home,
                    'spreadOddsAway': spread_odds_away,
                    'totalLine': total_line,
                    'source': 'the-odds-api',
                }

            time.sleep(0.2)
        except Exception:
            continue

    return out



def estimate_odds(margin: int, total_points: int, sport: str) -> dict:
    """
    DEPRECATED: Estimate odds based on final score.
    WARNING: This creates information leakage - odds derived from outcomes are NOT valid for backtesting!
    Only use this for display purposes when real odds are unavailable.
    """
    
    # Sport-specific adjustments
    if sport == "nfl":
        # NFL games are typically lower scoring, tighter spreads
        if margin > 14:
            ml_home = min(-300 + (margin * 5), -400)
            ml_away = max(250 - (margin * 4), 350)
        elif margin > 7:
            ml_home = -200 + (margin * 8)
            ml_away = 180 - (margin * 6)
        elif margin > 0:
            ml_home = -150 + (margin * 7)
            ml_away = 130 - (margin * 5)
        elif margin < -14:
            ml_home = max(250 + (margin * 4), 350)
            ml_away = min(-300 - (margin * 5), -400)
        elif margin < -7:
            ml_home = 180 + (margin * 6)
            ml_away = -200 - (margin * 8)
        elif margin < 0:
            ml_home = 130 + (margin * 5)
            ml_away = -150 - (margin * 7)
        else:
            ml_home = -110
            ml_away = -110
        spread_home = round(-margin / 2 * 2) / 2
        
    elif sport == "mlb":
        # MLB uses run lines (+/- 1.5 typically)
        if margin > 5:
            ml_home = -250 + (margin * 20)
            ml_away = 220 - (margin * 15)
        elif margin > 2:
            ml_home = -180 + (margin * 15)
            ml_away = 160 - (margin * 12)
        elif margin > 0:
            ml_home = -130 + (margin * 10)
            ml_away = 110 - (margin * 8)
        elif margin < -5:
            ml_home = 220 + (margin * 15)
            ml_away = -250 - (margin * 20)
        elif margin < -2:
            ml_home = 160 + (margin * 12)
            ml_away = -180 - (margin * 15)
        elif margin < 0:
            ml_home = 110 + (margin * 8)
            ml_away = -130 - (margin * 10)
        else:
            ml_home = -110
            ml_away = -110
        spread_home = -1.5 if margin > 0 else 1.5  # Run line
        
    elif sport == "nhl":
        # NHL uses puck lines (+/- 1.5)
        if margin > 3:
            ml_home = -200 + (margin * 30)
            ml_away = 180 - (margin * 25)
        elif margin > 1:
            ml_home = -150 + (margin * 25)
            ml_away = 130 - (margin * 20)
        elif margin > 0:
            ml_home = -120 + (margin * 15)
            ml_away = 100 - (margin * 10)
        elif margin < -3:
            ml_home = 180 + (margin * 25)
            ml_away = -200 - (margin * 30)
        elif margin < -1:
            ml_home = 130 + (margin * 20)
            ml_away = -150 - (margin * 25)
        elif margin < 0:
            ml_home = 100 + (margin * 10)
            ml_away = -120 - (margin * 15)
        else:
            ml_home = -110
            ml_away = -110
        spread_home = -1.5 if margin > 0 else 1.5  # Puck line
        
    else:  # NBA default
        if margin > 10:
            ml_home = -200 + (margin * 10)
            ml_away = 180 - (margin * 8)
        elif margin > 0:
            ml_home = -150 + (margin * 5)
            ml_away = 130 - (margin * 5)
        elif margin < -10:
            ml_home = 180 + (margin * 8)
            ml_away = -200 - (margin * 10)
        elif margin < 0:
            ml_home = 130 + (margin * 5)
            ml_away = -150 - (margin * 5)
        else:
            ml_home = -110
            ml_away = -110
        spread_home = round(-margin / 2, 1)
    
    return {
        "moneylineHome": int(max(min(ml_home, 500), -500)),
        "moneylineAway": int(max(min(ml_away, 500), -500)),
        "spreadHome": spread_home,
        "spreadAway": -spread_home,
        "totalLine": round(total_points, 0),
        "source": "live"
    }


def convert_game(game: dict, sport: str, season: int, real_odds: dict = None) -> dict:
    """Convert a BallDontLie game to our betting format.
    
    Args:
        game: Game data from BallDontLie API
        sport: Sport code (nba, nfl, etc.)
        season: Season year
        real_odds: Dict with real odds data from BallDontLie odds endpoint
                   Keys: 'moneyline', 'spread', 'total'
    """
    
    # Handle different API response structures per sport
    if sport == "nhl":
        home_team = game.get("home_team", {})
        visitor_team = game.get("away_team", {})
        home_score = game.get("home_score", 0) or 0
        away_score = game.get("away_score", 0) or 0
        game_date = game.get("game_date", game.get("date", ""))[:10] if game.get("game_date") or game.get("date") else ""
        home_name = home_team.get("full_name", home_team.get("name", "Unknown"))
        away_name = visitor_team.get("full_name", visitor_team.get("name", "Unknown"))
        
    elif sport == "mlb":
        home_team = game.get("home_team", {})
        visitor_team = game.get("away_team", {})
        # MLB stores runs in home_team_data/away_team_data
        home_data = game.get("home_team_data", {})
        away_data = game.get("away_team_data", {})
        home_score = home_data.get("runs", 0) or 0
        away_score = away_data.get("runs", 0) or 0
        game_date = game.get("date", "")[:10] if game.get("date") else ""
        home_name = home_team.get("full_name") or home_team.get("display_name") or home_team.get("name") or game.get("home_team_name", "Unknown")
        away_name = visitor_team.get("full_name") or visitor_team.get("display_name") or visitor_team.get("name") or game.get("away_team_name", "Unknown")
        
    else:  # NBA, NFL
        home_team = game.get("home_team", {})
        visitor_team = game.get("visitor_team", {})
        home_score = game.get("home_team_score", 0) or 0
        away_score = game.get("visitor_team_score", 0) or 0
        game_date = game.get("date", "")[:10] if game.get("date") else ""
        home_name = home_team.get("full_name") or home_team.get("name", "Unknown")
        away_name = visitor_team.get("full_name") or visitor_team.get("name", "Unknown")
    
    total_points = home_score + away_score
    margin = home_score - away_score
    
    # Skip games with no scores
    if home_score == 0 and away_score == 0:
        return None
    
    # Determine winner
    if home_score > away_score:
        winner = "home"
    elif away_score > home_score:
        winner = "away"
    else:
        winner = "draw"
    
    # Generate unique ID
    game_id = hashlib.md5(
        f"{sport}-{game.get('id', '')}-{game_date}".encode()
    ).hexdigest()[:8]
    
    # Use REAL odds if available, otherwise mark as estimated (DO NOT USE FOR BACKTESTING)
    has_real_odds = False
    
    # Check for embedded odds from NFL special fetch
    if game.get("_has_real_odds") and game.get("_real_odds"):
        embedded_odds = game.get("_real_odds", {})
        has_real_odds = True
        ml_home = embedded_odds.get("moneyline_home")
        ml_away = embedded_odds.get("moneyline_away")
        # Parse spread - may be string
        try:
            spread_home = float(embedded_odds.get("spread_home", 0) or 0)
        except:
            spread_home = 0
        try:
            total = float(embedded_odds.get("total", 0) or 0)
        except:
            total = 0
        real_odds = embedded_odds  # Use embedded for downstream logic
    ml_home = None
    ml_away = None
    spread_home = None
    spread_away = None
    spread_odds_home = -110
    spread_odds_away = -110
    total_line = None
    
    if real_odds:
        # Extract moneyline odds
        ml_data = real_odds.get("moneyline")
        if ml_data:
            ml_home = ml_data.get("odds_american_home") or ml_data.get("home_odds")
            ml_away = ml_data.get("odds_american_visitor") or ml_data.get("away_odds")
            if ml_home is not None and ml_away is not None:
                has_real_odds = True
        
        # Extract spread odds
        spread_data = real_odds.get("spread")
        if spread_data:
            # away_spread field contains the away team's spread (e.g., +6.5)
            away_spread_val = spread_data.get("away_spread")
            if away_spread_val is not None:
                spread_away = float(away_spread_val)
                spread_home = -spread_away  # Home spread is opposite
                spread_odds_home = spread_data.get("odds_american_home", -110)
                spread_odds_away = spread_data.get("odds_american_visitor", -110)
                has_real_odds = True
        
        # Extract total/over-under
        total_data = real_odds.get("total")
        if total_data:
            total_line = total_data.get("over_under") or total_data.get("total_line")
            if total_line is not None:
                total_line = float(total_line)
    
    # Build odds object
    if has_real_odds:
        odds = {
            "moneylineHome": int(ml_home) if ml_home else None,
            "moneylineAway": int(ml_away) if ml_away else None,
            "spreadHome": spread_home,
            "spreadAway": spread_away,
            "spreadOddsHome": int(spread_odds_home) if spread_odds_home else -110,
            "spreadOddsAway": int(spread_odds_away) if spread_odds_away else -110,
            "totalLine": total_line,
            "source": "live"  # Real odds from BallDontLie
        }
    else:
        # Use estimated odds but CLEARLY MARK THEM - not suitable for backtesting!
        estimated = estimate_odds(margin, total_points, sport)
        estimated["source"] = "estimated_DO_NOT_BACKTEST"
        odds = estimated
    
    # Determine spread coverage (only if we have spread data)
    if spread_home is not None:
        if margin > abs(spread_home):
            spread_covered = "home" if spread_home < 0 else "away"
        elif margin < -abs(spread_home):
            spread_covered = "away" if spread_home < 0 else "home"
        else:
            spread_covered = "push"
    else:
        spread_covered = None
    
    # Determine over/under (only if we have total line)
    if total_line is not None:
        if total_points > total_line:
            total_result = "over"
        elif total_points < total_line:
            total_result = "under"
        else:
            total_result = "push"
    else:
        total_result = None
    
    return {
        "id": game_id,
        "bdl_game_id": game.get("id"),  # Keep BallDontLie ID for reference
        "sport": sport,
        "date": game_date,
        "season": season,
        "homeTeam": home_name,
        "awayTeam": away_name,
        "scores": {
            "homeScore": home_score,
            "awayScore": away_score,
            "homeQ1": None,
            "homeQ2": None,
            "homeQ3": None,
            "homeQ4": None,
            "awayQ1": None,
            "awayQ2": None,
            "awayQ3": None,
            "awayQ4": None
        },
        "odds": odds,
        "hasRealOdds": has_real_odds,
        "result": {
            "winner": winner,
            "spreadCovered": spread_covered,
            "totalResult": total_result,
            "margin": margin,
            "totalPoints": total_points
        }
    }




def fetch_nfl_from_the_odds_api(api_key: str) -> list:
    """Fetch NFL historical games and odds from The Odds API.
    This is used because BallDontLie has no 2024 NFL odds data.
    Returns list of games in our betting format.
    """
    if not api_key:
        print("  ⚠️ THE_ODDS_API_KEY not set")
        return []
    
    print("  Fetching NFL from The Odds API (historical + upcoming)...")
    
    games = []
    sport = "americanfootball_nfl"
    
    try:
        # Get historical scores with completed games
        scores_url = f"https://api.the-odds-api.com/v4/sports/{sport}/scores"
        params = f"?apiKey={api_key}&daysFrom=3"
        
        if USE_REQUESTS:
            resp = requests.get(scores_url + params, timeout=30)
            resp.raise_for_status()
            scores_data = resp.json()
        else:
            req = urllib.request.Request(scores_url + params)
            with urllib.request.urlopen(req, timeout=30) as response:
                scores_data = json.loads(response.read().decode())
        
        print(f"    Got {len(scores_data)} games from scores API")
        
        # Get odds for current/upcoming games
        odds_url = f"https://api.the-odds-api.com/v4/sports/{sport}/odds"
        odds_params = f"?apiKey={api_key}&regions=us&markets=h2h,spreads,totals&oddsFormat=american"
        
        if USE_REQUESTS:
            odds_resp = requests.get(odds_url + odds_params, timeout=30)
            odds_resp.raise_for_status()
            odds_data = odds_resp.json()
        else:
            req = urllib.request.Request(odds_url + odds_params)
            with urllib.request.urlopen(req, timeout=30) as response:
                odds_data = json.loads(response.read().decode())
        
        print(f"    Got {len(odds_data)} games with odds")
        
        # Build odds lookup by game ID
        odds_lookup = {}
        for og in odds_data:
            game_id = og.get("id")
            if game_id:
                # Extract best odds from first bookmaker
                bookmakers = og.get("bookmakers", [])
                if bookmakers:
                    bm = bookmakers[0]
                    markets = {m["key"]: m for m in bm.get("markets", [])}
                    
                    h2h = markets.get("h2h", {}).get("outcomes", [])
                    spreads = markets.get("spreads", {}).get("outcomes", [])
                    totals = markets.get("totals", {}).get("outcomes", [])
                    
                    odds_lookup[game_id] = {
                        "h2h": h2h,
                        "spreads": spreads,
                        "totals": totals,
                        "source": "the-odds-api"
                    }
        
        # Process each game from scores
        for game in scores_data:
            game_id = game.get("id")
            completed = game.get("completed", False)
            
            home_team = game.get("home_team", "Unknown")
            away_team = game.get("away_team", "Unknown")
            
            scores = game.get("scores") or []
            home_score = 0
            away_score = 0
            for s in scores:
                if s.get("name") == home_team:
                    home_score = int(s.get("score", 0) or 0)
                elif s.get("name") == away_team:
                    away_score = int(s.get("score", 0) or 0)
            
            # Skip games without scores (not completed)
            if home_score == 0 and away_score == 0 and not completed:
                continue
            
            # Get odds for this game
            game_odds = odds_lookup.get(game_id, {})
            h2h = game_odds.get("h2h", [])
            spreads = game_odds.get("spreads", [])
            totals = game_odds.get("totals", [])
            
            # Extract odds values
            ml_home = None
            ml_away = None
            spread_home = None
            spread_home_odds = None
            total_value = None
            
            for o in h2h:
                if o.get("name") == home_team:
                    ml_home = o.get("price")
                elif o.get("name") == away_team:
                    ml_away = o.get("price")
            
            for o in spreads:
                if o.get("name") == home_team:
                    spread_home = o.get("point")
                    spread_home_odds = o.get("price")
            
            for o in totals:
                if o.get("name") == "Over":
                    total_value = o.get("point")
            
            has_real_odds = ml_home is not None or spread_home is not None
            
            # Create game in our format
            game_date = game.get("commence_time", "")[:10]
            total_points = home_score + away_score
            margin = home_score - away_score
            
            converted = {
                "id": game_id[:8] if game_id else hashlib.md5(f"nfl-{game_date}-{home_team}".encode()).hexdigest()[:8],
                "sport": "nfl",
                "date": game_date,
                "season": 2024 if "2024" in game_date or "2025-01" in game_date else 2025,
                "homeTeam": home_team,
                "awayTeam": away_team,
                "scores": {
                    "homeScore": home_score,
                    "awayScore": away_score,
                    "homeQ1": None, "homeQ2": None, "homeQ3": None, "homeQ4": None,
                    "awayQ1": None, "awayQ2": None, "awayQ3": None, "awayQ4": None
                },
                "odds": {
                    "homeMoneyline": ml_home,
                    "awayMoneyline": ml_away,
                    "homeSpread": spread_home,
                    "homeSpreadOdds": spread_home_odds,
                    "awaySpread": -spread_home if spread_home else None,
                    "total": total_value,
                    "source": "the-odds-api" if has_real_odds else "estimated_DO_NOT_BACKTEST"
                },
                "hasRealOdds": has_real_odds,
                "result": {
                    "winner": "home" if margin > 0 else ("away" if margin < 0 else "draw"),
                    "spreadCovered": margin > -spread_home if spread_home else None,
                    "totalResult": "over" if total_value and total_points > total_value else "under" if total_value and total_points < total_value else None,
                    "margin": margin,
                    "totalPoints": total_points
                }
            }
            
            games.append(converted)
        
        print(f"    Processed {len(games)} NFL games with {len([g for g in games if g.get('hasRealOdds')])} having real odds")
        
    except Exception as e:
        print(f"    The Odds API error: {e}")
        import traceback
        traceback.print_exc()
    
    return games



def process_sport(sport: str, seasons_to_keep: list = None, require_real_odds: bool = False):
    """Process a single sport - fetch, convert, and save.
    
    Args:
        sport: Sport code (nba, nfl, etc.)
        seasons_to_keep: List of season years to keep in the dataset
        require_real_odds: If True, only include games with real odds (recommended for backtesting)
    """
    if seasons_to_keep is None:
        seasons_to_keep = [2024]
    
    config = SPORT_CONFIG.get(sport)
    if not config:
        print(f"Unknown sport: {sport}")
        return None
    
    print(f"\n{'=' * 60}")
    print(f"Processing {sport.upper()}")
    print(f"{'=' * 60}")
    
    # Fetch ALL games for ALL configured seasons
    seasons = config.get("seasons", [2024])
    all_games = []
    all_odds = {}
    
    for season in seasons:
        print(f"\n📅 Fetching {sport.upper()} season {season}...")
        season_games = fetch_all_games(sport, season, per_page=100, max_pages=100)
        if season_games:
            all_games.extend(season_games)
            print(f"  Got {len(season_games)} games for {season}")
        
        # BallDontLie odds endpoints are inconsistent across sports, and NFL can hang.
        # We backfill NFL odds via The Odds API instead, so skip BallDontLie NFL odds here.
        # BallDontLie odds endpoints are inconsistent (often 400/404) and can hang.
        # We rely on The Odds API historical backfill for real odds instead.
        # (BallDontLie odds fetch disabled)
        pass
    games = all_games
    real_odds_by_game = all_odds
    
    # Build a secondary lookup by date+teams since game IDs don't match between APIs
    odds_by_matchup = {}
    for odds_id, odds_data in real_odds_by_game.items():
        game_date = odds_data.get("game_date", "")
        home = odds_data.get("home_team", "").lower().strip()
        away = odds_data.get("away_team", "").lower().strip()
        if game_date and home and away:
            # Create lookup key
            key = f"{game_date}:{home}:{away}"
            odds_by_matchup[key] = odds_data
    
    # DEBUG: Print sample game IDs and odds IDs to check if they match
    if games and real_odds_by_game:
        sample_game_ids = [str(g.get('id')) for g in games[:5]]
        sample_odds_ids = list(real_odds_by_game.keys())[:5]
        print(f"  DEBUG: Sample game IDs from games API: {sample_game_ids}")
        print(f"  DEBUG: Sample game IDs in odds dict: {sample_odds_ids}")
    
    if not games:
        print(f"No {sport.upper()} games fetched. Check API access.")
        return None
    
    print(f"\n📊 Total: {len(games)} games, real odds for {len(real_odds_by_game)} games")
    
    # Convert to betting format
    print(f"\nConverting {len(games)} games to betting format...")
    betting_games = []
    skipped = 0
    skipped_no_odds = 0
    
    for game in games:
        try:
            bdl_game_id = game.get("id")
            game_odds = real_odds_by_game.get(bdl_game_id)
            
            # If ID lookup fails, try matchup-based lookup
            if not game_odds and odds_by_matchup:
                game_date = game.get("date", "")[:10] if game.get("date") else ""
                home_team = game.get("home_team", {})
                away_team = game.get("visitor_team", game.get("away_team", {}))
                home_name = home_team.get("name", "").lower().strip() if isinstance(home_team, dict) else str(home_team).lower().strip()
                away_name = away_team.get("name", "").lower().strip() if isinstance(away_team, dict) else str(away_team).lower().strip()
                matchup_key = f"{game_date}:{home_name}:{away_name}"
                game_odds = odds_by_matchup.get(matchup_key)
            
            # Get season from game or use the most recent configured season
            game_season = game.get("season", seasons[-1] if seasons else 2024)
            
            converted = convert_game(game, sport, game_season, real_odds=game_odds)
            if converted:
                if require_real_odds and not converted.get("hasRealOdds"):
                    skipped_no_odds += 1
                    continue
                betting_games.append(converted)
            else:
                skipped += 1
        except Exception as e:
            print(f"  Warning: Failed to convert game {game.get('id')}: {e}")
            skipped += 1
    
    games_with_real_odds = len([g for g in betting_games if g.get("hasRealOdds")])
    print(f"Converted {len(betting_games)} games (skipped {skipped} incomplete, {skipped_no_odds} without real odds)")
    print(f"  📈 Games with REAL odds: {games_with_real_odds}")
    print(f"  ⚠️  Games with estimated odds: {len(betting_games) - games_with_real_odds}")
    # Backfill real odds using The Odds API (historical snapshots)
    # NOTE: This can burn a lot of quota if enabled for every sport, so we default it to NFL.
    if BACKFILL_ALL_SPORTS or sport == 'nfl':
        fallback = fetch_the_odds_api_fallback(sport, betting_games)
        filled = 0
        if fallback:
            for g in betting_games:
                if g.get('hasRealOdds') or not g.get('date'):
                    continue
                key = (g.get('date'), _norm_team_name(g.get('homeTeam')), _norm_team_name(g.get('awayTeam')))
                o = fallback.get(key)
                if not o:
                    continue
                g['odds']['moneylineHome'] = o.get('moneylineHome')
                g['odds']['moneylineAway'] = o.get('moneylineAway')
                g['odds']['spreadHome'] = o.get('spreadHome')
                g['odds']['spreadAway'] = o.get('spreadAway')
                g['odds']['spreadOddsHome'] = o.get('spreadOddsHome', -110)
                g['odds']['spreadOddsAway'] = o.get('spreadOddsAway', -110)
                g['odds']['totalLine'] = o.get('totalLine')
                g['odds']['source'] = 'the-odds-api'
                g['hasRealOdds'] = True
                filled += 1
        if filled:
            print(f"  ✅ Backfilled real odds from The Odds API for {filled} games")

    
    # Load existing data
    output_file = DATA_DIR / config["output_file"]
    existing_games = []
    
    if output_file.exists():
        try:
            with open(output_file, "r") as f:
                existing_data = json.load(f)
            
            # Filter to keep only specified seasons
            existing_games = [
                game for game in existing_data
                if game.get("season") in seasons_to_keep
                and game.get("date") not in (None, "", "Unknown")
                and game.get("homeTeam") not in (None, "", "Unknown")
                and game.get("awayTeam") not in (None, "", "Unknown")
            ]
            
            removed = len(existing_data) - len(existing_games)
            if removed > 0:
                print(f"Removed {removed} games from old seasons")
        except Exception as e:
            print(f"Warning: Could not read existing data: {e}")
    
    # Merge games (deduplicate by ID)
    # Include ALL freshly-converted games so they can overwrite older records (e.g. to apply backfilled real odds).
    all_games = existing_games + betting_games

    # Remove duplicates by ID (prefer newer entries): later items overwrite earlier ones.
    by_id = {}
    for game in all_games:
        gid = game.get('id')
        if not gid:
            continue
        by_id[gid] = game
    all_games = list(by_id.values())
    
    # Sort by date
    all_games.sort(key=lambda g: g.get("date", ""))
    
    print(f"\nTotal games in database: {len(all_games)} ")
    
    # Save to file
    with open(output_file, "w") as f:
        json.dump(all_games, f, indent=2)
    
    print(f"Saved to {output_file}")
    
    # Show sample
    if all_games:
        print(f"\nSample {sport.upper()} games:")
        for game in all_games[:3]:
            game_date = game.get('date', game.get('game_date', 'Unknown'))
            home = game.get('homeTeam', game.get('home_team', {}).get('name', 'Unknown') if isinstance(game.get('home_team'), dict) else 'Unknown')
            away = game.get('awayTeam', game.get('visitor_team', {}).get('name', 'Unknown') if isinstance(game.get('visitor_team'), dict) else 'Unknown')
            scores = game.get('scores', {})
            print(f"  {game_date}: {away} @ {home} ({scores.get('awayScore','?')}-{scores.get('homeScore','?')})")
        if len(all_games) > 3:
            print(f"  ... and {len(all_games) - 3} more games")
    
    return {
        "sport": sport,
        "total_games": len(all_games),
        "games_with_real_odds": len([g for g in all_games if g.get("hasRealOdds") or g.get("odds", {}).get("source") == "live"]),
        "games_with_estimated_odds": len([g for g in all_games if g.get("odds", {}).get("source") == "estimated_DO_NOT_BACKTEST"]),
        "seasons": seasons
    }


def main():
    """Main entry point."""
    print("=" * 60)
    print("BallDontLie Sports Data Fetcher - All Major Sports")
    print("=" * 60)
    
    # Ensure directories exist
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    
    # Determine which sports to process
    all_sports = ["nba", "nfl", "nhl", "mlb"]
    sports_to_process = all_sports
    
    if len(sys.argv) > 1:
        arg = sys.argv[1].lower()
        if arg == "all":
            sports_to_process = all_sports
        elif arg in SPORT_CONFIG:
            sports_to_process = [arg]
        else:
            print(f"Unknown sport: {arg}")
            print(f"Available: {', '.join(SPORT_CONFIG.keys())}, all")
            sys.exit(1)
    
    # Check if we should require real odds (recommended for backtesting)
    require_real_odds = "--require-real-odds" in sys.argv or "-r" in sys.argv
    if require_real_odds:
        print("\n⚠️  REQUIRE_REAL_ODDS mode: Only games with verified sportsbook odds will be included")
    
    # Process each sport (keep both 2024 and 2025 seasons)
    results = []
    for sport in sports_to_process:
        result = process_sport(sport, seasons_to_keep=[2024, 2025], require_real_odds=require_real_odds)
        if result:
            results.append(result)
    
    # Update metadata
    total_real_odds = sum(r.get("games_with_real_odds", 0) for r in results)
    total_estimated = sum(r.get("games_with_estimated_odds", 0) for r in results)
    
    metadata = {
        "importedAt": datetime.now().isoformat(),
        "source": "BallDontLie API",
        "totalGames": sum(r["total_games"] for r in results),
        "gamesWithRealOdds": total_real_odds,
        "gamesWithEstimatedOdds": total_estimated,
        "sports": [r["sport"] for r in results],
        "seasons": list(set(s for r in results for s in r.get("seasons", []))),
        "dateRange": "2024-2025",
        "warnings": ["Games with 'estimated_DO_NOT_BACKTEST' odds source should NOT be used for backtesting"] if total_estimated > 0 else []
    }
    
    metadata_file = DATA_DIR / "metadata.json"
    with open(metadata_file, "w") as f:
        json.dump(metadata, f, indent=2)
    
    print(f"\n{'=' * 60}")
    print("SUMMARY")
    print(f"{'=' * 60}")
    for r in results:
        real = r.get('games_with_real_odds', 0)
        est = r.get('games_with_estimated_odds', 0)
        print(f"  {r['sport'].upper()}: {r['total_games']} games (📈 {real} real odds, ⚠️ {est} estimated)")
    print(f"  {'─' * 30}")
    print(f"  TOTAL: {metadata['totalGames']} games across {len(results)} sports")
    print(f"  📈 Games with REAL odds (safe for backtesting): {total_real_odds}")
    if total_estimated > 0:
        print(f"  ⚠️  Games with ESTIMATED odds (DO NOT BACKTEST): {total_estimated}")
    print(f"\nMetadata saved to: {metadata_file}")
    print("\n✅ Done!")


if __name__ == "__main__":
    main()
