#!/usr/bin/env python3
"""
Comprehensive Sports Data Updater for BallDontLie API.

This script fetches and caches ALL available sports data to reduce API calls.
Designed to run daily via systemd service.

Sports supported (per BallDontLie API):
- NBA, NFL, NHL, MLB
- WNBA, NCAAF, NCAAB
- EPL, La Liga, Serie A, Bundesliga, Ligue 1
- UEFA Champions League
- MMA

Usage:
    python sports_data_updater.py [--sport SPORT] [--force]
    
    --sport: Specific sport to update (default: all)
    --force: Force update even if recent data exists
"""

import os
import sys
import json
import time
import argparse
import logging
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional, Any

try:
    import requests
except ImportError:
    print("Error: requests required. Install: pip install requests")
    sys.exit(1)

# Ensure script directory is in path for local imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
try:
    import fetch_odds_api
except ImportError:
    print("Warning: fetch_odds_api not found")
    fetch_odds_api = None

# Configuration
API_BASE = "https://api.balldontlie.io"
def _load_env_value(name: str) -> Optional[str]:
    v = os.environ.get(name)
    if v and isinstance(v, str) and v.strip():
        return v.strip().strip('"').strip("'")
    # Best-effort: parse local .env when running via SSH/non-login shells.
    # This avoids shell-expansion issues when sourcing .env that contains special characters.
    try:
        env_path = Path("/var/www/html/eventheodds/.env")
        if env_path.exists():
            for line in env_path.read_text().splitlines():
                line = line.strip()
                if not line or line.startswith("#") or "=" not in line:
                    continue
                k, val = line.split("=", 1)
                if k.strip() != name:
                    continue
                val = val.strip().strip('"').strip("'")
                if val:
                    return val
    except Exception:
        pass
    return None

API_KEY = _load_env_value("BALLDONTLIE_API_KEY")
if not API_KEY:
    raise RuntimeError("BALLDONTLIE_API_KEY is not set. Put it in the environment or /var/www/html/eventheodds/.env")
HEADERS = {"Authorization": API_KEY}
DATA_DIR = Path("/var/www/html/eventheodds/data")
LOG_FILE = "/var/log/sports_data_updater.log"

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(LOG_FILE) if os.access(os.path.dirname(LOG_FILE), os.W_OK) else logging.StreamHandler(),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Sport configurations with API endpoints
SPORTS_CONFIG = {
    "nba": {
        "prefix": "/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "injuries": "/player_injuries",
            "odds": "/odds",
            "stats": "/stats",
            "season_averages": "/season_averages",
        },
        "season": 2025
    },
    "nfl": {
        "prefix": "/nfl/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "injuries": "/player_injuries",
            "odds": "/odds",
            "stats": "/stats",
            "season_stats": "/season_stats",
        },
        "season": 2025
    },
    "nhl": {
        "prefix": "/nhl/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
            "box_scores": "/box_scores",
        },
        "season": 2025
    },
    "mlb": {
        "prefix": "/mlb/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "injuries": "/player_injuries",
            "stats": "/stats",
        },
        "season": 2024
    },
    "wnba": {
        "prefix": "/wnba/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "injuries": "/player_injuries",
        },
        "season": 2024
    },
    "ncaaf": {
        "prefix": "/ncaaf/v1",
        "endpoints": {
            "teams": "/teams",
            "conferences": "/conferences",
            "games": "/games",
            "standings": "/standings",
            "rankings": "/rankings",
            "odds": "/odds",
        },
        "season": 2024
    },
    "ncaab": {
        "prefix": "/ncaab/v1",
        "endpoints": {
            "teams": "/teams",
            "conferences": "/conferences",
            "games": "/games",
            "standings": "/standings",
            "rankings": "/rankings",
            "brackets": "/brackets",
            "odds": "/odds",
        },
        "season": 2025
    },
    "epl": {
        "prefix": "/epl/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
        },
        "season": 2025
    },
    "bundesliga": {
        "prefix": "/bundesliga/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
        },
        "season": 2025
    },
    "laliga": {
        "prefix": "/laliga/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
        },
        "season": 2025
    },
    "ligue1": {
        "prefix": "/ligue1/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
        },
        "season": 2025
    },
    "seriea": {
        "prefix": "/seriea/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
        },
        "season": 2025
    },
    "ucl": {
        "prefix": "/ucl/v1",
        "endpoints": {
            "teams": "/teams",
            "players": "/players",
            "games": "/games",
            "standings": "/standings",
            "odds": "/odds",
        },
        "season": 2025
    },
    "mma": {
        "prefix": "/mma/v1",
        "endpoints": {
            "leagues": "/leagues",
            "events": "/events",
            "fighters": "/fighters",
            "fights": "/fights",
            "rankings": "/rankings",
            "odds": "/odds",
        },
        "season": 2024
    },
}


class SportsDataUpdater:
    """Handles fetching and caching sports data."""
    
    def __init__(self, data_dir: Path = DATA_DIR):
        self.data_dir = data_dir
        self.stats = {"fetched": 0, "errors": 0, "skipped": 0}
        
    def make_request(self, url: str, params: dict = None) -> Optional[dict]:
        """Make API request with retry logic."""
        for attempt in range(3):
            try:
                response = requests.get(url, headers=HEADERS, params=params, timeout=30)
                
                if response.status_code == 429:
                    logger.warning(f"Rate limited, waiting 60s...")
                    time.sleep(60)
                    continue
                    
                if response.status_code == 401:
                    logger.error("Authentication failed - check API key")
                    return None
                    
                if response.status_code == 403:
                    logger.debug(f"Access denied (tier restriction): {url}")
                    return None
                    
                if response.status_code == 404:
                    logger.debug(f"Endpoint not found: {url}")
                    return None
                    
                response.raise_for_status()
                return response.json()
                
            except requests.exceptions.RequestException as e:
                logger.warning(f"Request failed (attempt {attempt + 1}): {e}")
                time.sleep(2 ** attempt)
                
        return None
    
    def fetch_paginated(self, url: str, params: dict = None, max_pages: int = 50) -> List[dict]:
        """Fetch all pages of a paginated endpoint."""
        all_data = []
        params = params or {}
        params["per_page"] = 100
        cursor = None
        
        for page in range(max_pages):
            if cursor:
                params["cursor"] = cursor
                
            data = self.make_request(url, params)
            if not data:
                break
                
            items = data.get("data", [])
            if not items:
                break
                
            all_data.extend(items)
            cursor = data.get("meta", {}).get("next_cursor")
            
            if not cursor:
                break
                
            time.sleep(0.3)  # Rate limiting
            
        return all_data
    
    def save_data(self, sport: str, data_type: str, data: Any):
        """Save data to JSON file."""
        sport_dir = self.data_dir / sport
        sport_dir.mkdir(parents=True, exist_ok=True)
        
        filepath = sport_dir / f"{data_type}.json"
        with open(filepath, "w") as f:
            json.dump(data, f, indent=2, default=str)
            
        logger.info(f"Saved {sport}/{data_type}: {len(data) if isinstance(data, list) else 'config'} entries")
        self.stats["fetched"] += 1

    def load_existing_list(self, sport: str, data_type: str) -> List[dict]:
        """Load existing JSON array from disk if present."""
        try:
            p = self.data_dir / sport / f"{data_type}.json"
            if not p.exists():
                return []
            with open(p, "r") as f:
                data = json.load(f)
            if isinstance(data, list):
                return data
            if isinstance(data, dict) and isinstance(data.get("data"), list):
                return data["data"]
            return []
        except Exception:
            return []

    def _dedupe_odds(self, rows: List[dict]) -> List[dict]:
        """Deduplicate odds rows while preserving the most complete/most recent entry per (game_id,type,vendor)."""
        def score(o: dict) -> int:
            # Prefer rows with more filled numeric fields + newer updated_at
            filled = 0
            for k in [
                "moneyline_home_odds",
                "moneyline_away_odds",
                "spread_home_value",
                "spread_away_value",
                "total_value",
                "odds_american_home",
                "odds_american_visitor",
                "over_under",
            ]:
                v = o.get(k)
                if v is not None and v != "":
                    filled += 1
            ts = o.get("updated_at") or o.get("updatedAt") or ""
            try:
                t = int(datetime.fromisoformat(str(ts).replace("Z", "+00:00")).timestamp())
            except Exception:
                t = 0
            return filled * 100000 + t

        best: Dict[str, dict] = {}
        for o in rows:
            gid = o.get("game_id") or o.get("gameId") or o.get("id")
            if gid is None:
                continue
            typ = str(o.get("type") or o.get("market") or "unknown").lower()
            vendor = str(o.get("vendor") or o.get("book") or o.get("sportsbook") or "unknown").lower()
            key = f"{gid}:{typ}:{vendor}"
            cur = best.get(key)
            if cur is None or score(o) > score(cur):
                best[key] = o
        return list(best.values())

    def _season_start_date(self, sport: str, season: int) -> datetime:
        """Best-effort season start date (UTC)."""
        # NBA/NHL typically start in October of the season year.
        if sport in ["nba", "nhl"]:
            return datetime(season, 10, 1)
        # NFL begins late summer, but odds is week-based for nfl in this script.
        if sport in ["ncaaf"]:
            return datetime(season, 8, 1)
        if sport in ["ncaab"]:
            return datetime(season, 10, 15)
        if sport in ["mlb"]:
            return datetime(season, 3, 1)
        if sport in ["wnba"]:
            return datetime(season, 5, 1)
        # Soccer seasons vary; keep conservative.
        return datetime(season, 8, 1)

    def _extract_game_dates_from_local_games(self, sport: str) -> List[str]:
        """Read sport/games.json and return distinct YYYY-MM-DD date strings."""
        try:
            p = self.data_dir / sport / "games.json"
            if not p.exists():
                return []
            with open(p, "r") as f:
                data = json.load(f)
            rows = data if isinstance(data, list) else (data.get("data") if isinstance(data, dict) else [])
            out = set()
            for g in rows or []:
                d = g.get("date") or g.get("game_date") or g.get("datetime") or g.get("start_time_utc") or g.get("start_time") or g.get("kickoff")
                if not d:
                    continue
                ds = str(d)[:10]
                if len(ds) == 10:
                    out.add(ds)
            return sorted(out)
        except Exception:
            return []
        
    def update_sport(self, sport: str, config: dict, force: bool = False, odds_only: bool = False):
        """Update all data for a specific sport."""
        logger.info(f"{'=' * 50}")
        logger.info(f"Updating {sport.upper()}")
        logger.info(f"{'=' * 50}")
        
        prefix = config["prefix"]
        season = config["season"]
        
        for endpoint_name, endpoint_path in config["endpoints"].items():
            try:
                url = f"{API_BASE}{prefix}{endpoint_path}"
                params = {}
                
                # Add season parameter for relevant endpoints
                if endpoint_name in ["games", "standings", "stats", "season_stats", "season_averages", "rankings", "brackets"]:
                    if sport in ["nba", "wnba"]:
                        params["seasons[]"] = season
                    elif sport in ["nfl", "ncaab"]:
                        params["season"] = season
                    elif sport in ["ncaaf"]:
                        params["seasons[]"] = season
                    elif sport == "nhl":
                        params["seasons[]"] = season
                        
                # Special handling for odds
                if endpoint_name == "odds":
                    self._fetch_odds(sport, config)
                    continue

                if odds_only:
                    # Skip non-odds endpoints when running in odds-only mode.
                    self.stats["skipped"] += 1
                    continue
                    
                logger.info(f"  Fetching {endpoint_name}...")
                
                # Paginated endpoints
                if endpoint_name in ["players", "games", "stats", "injuries", "fights", "fighters", "events"]:
                    data = self.fetch_paginated(url, params)
                else:
                    response = self.make_request(url, params)
                    data = response.get("data", []) if response else []
                    
                if data:
                    self.save_data(sport, endpoint_name, data)
                else:
                    logger.debug(f"  No data for {endpoint_name}")
                    self.stats["skipped"] += 1
                    
            except Exception as e:
                logger.error(f"  Error fetching {endpoint_name}: {e}")
                self.stats["errors"] += 1
                
    def _fetch_odds(self, sport: str, config: dict):
        """Fetch betting odds with date/week handling."""
        prefix = config["prefix"]
        season = config["season"]
        all_odds: List[dict] = []
        
        logger.info(f"  Fetching odds...")
        
        if sport == "nfl":
            # NFL uses season + week
            for week in range(1, 23):
                url = f"{API_BASE}{prefix}/odds"
                params = {"season": season, "week": week, "per_page": 100}
                data = self.make_request(url, params)
                if data and data.get("data"):
                    for item in data["data"]:
                        item["week"] = week
                    all_odds.extend(data["data"])
                time.sleep(0.3)
                
        elif sport in ["nba", "nhl"]:
            # NBA/NHL use dates.
            # Critical: backfill from season start (not just last 60 days), but only on dates that have games.
            # This prevents the common "odds start in November" gap when the season begins in October.
            today = datetime.now()
            season_start = self._season_start_date(sport, season)

            # Prefer local games.json to avoid hammering the API.
            game_dates = self._extract_game_dates_from_local_games(sport)

            # If no games file exists yet, fall back to a bounded window.
            if not game_dates:
                logger.warning(f"  No local games.json for {sport}; falling back to last 60 days odds")
                game_dates = [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(60)]

            # Restrict to [season_start, today]
            filtered_dates = []
            for ds in game_dates:
                try:
                    dt = datetime.fromisoformat(ds)
                    if dt < season_start or dt > today:
                        continue
                    filtered_dates.append(ds)
                except Exception:
                    continue

            # Merge with existing odds and backfill missing earlier dates when needed.
            existing = self.load_existing_list(sport, "odds")
            existing_dates = set()
            for o in existing:
                d = o.get("date") or o.get("game_date") or o.get("commence_time") or o.get("updated_at")
                if d:
                    existing_dates.add(str(d)[:10])

            # If we already have odds data, only fetch dates we don't have (plus last 3 days for updates).
            to_fetch = []
            for ds in filtered_dates:
                if ds not in existing_dates:
                    to_fetch.append(ds)
            # Always refresh the most recent few game dates
            to_fetch.extend(filtered_dates[-3:])
            to_fetch = sorted(set(to_fetch))

            logger.info(f"  Odds dates to fetch for {sport}: {len(to_fetch)} (seasonStart={season_start.strftime('%Y-%m-%d')})")

            for ds in to_fetch:
                url = f"{API_BASE}{prefix}/odds"
                if sport == "nba":
                    params = {"date": ds, "per_page": 100}
                else:
                    params = {"dates[]": ds, "per_page": 100}

                data = self.make_request(url, params)
                if data and data.get("data"):
                    for item in data["data"]:
                        item["date"] = ds
                    all_odds.extend(data["data"])
                time.sleep(0.3)

            # Include existing rows and dedupe (prefer best row per key)
            all_odds = self._dedupe_odds(existing + all_odds)
                
        elif sport in ["ncaaf", "ncaab", "epl"]:
            # Use dates
            today = datetime.now()
            for i in range(30):
                date = (today - timedelta(days=i)).strftime("%Y-%m-%d")
                url = f"{API_BASE}{prefix}/odds"
                params = {"dates[]": date, "per_page": 100}
                data = self.make_request(url, params)
                if data and data.get("data"):
                    all_odds.extend(data["data"])
                time.sleep(0.3)
                
        if all_odds:
            self.save_data(sport, "odds", all_odds)
            
    def fetch_player_props(self, sport: str, config: dict):
        """Fetch live player props for upcoming games."""
        prefix = config["prefix"]
        
        logger.info(f"  Fetching player props...")
        
        # Get today/tomorrow games
        today = datetime.now()
        game_ids = []
        
        for i in range(-1, 3):
            date = (today + timedelta(days=i)).strftime("%Y-%m-%d")
            url = f"{API_BASE}{prefix}/games"
            params = {"dates[]": date, "per_page": 50}
            data = self.make_request(url, params)
            if data and data.get("data"):
                game_ids.extend([g["id"] for g in data["data"]])
                
        all_props = []
        for game_id in game_ids[:20]:  # Limit
            url = f"{API_BASE}{prefix}/odds/player_props"
            params = {"game_id": game_id}
            data = self.make_request(url, params)
            if data and data.get("data"):
                all_props.extend(data["data"])
            time.sleep(0.3)
            
        if all_props:
            self.save_data(sport, "player_props", all_props)
            
    def create_master_index(self):
        """Create master index of all cached data."""
        index = {
            "updated_at": datetime.now().isoformat(),
            "sports": {},
            "totals": {"entries": 0, "files": 0}
        }
        
        for sport_dir in self.data_dir.iterdir():
            if sport_dir.is_dir() and not sport_dir.name.startswith("."):
                sport_data = {}
                for json_file in sport_dir.glob("*.json"):
                    try:
                        with open(json_file) as f:
                            data = json.load(f)
                            count = len(data) if isinstance(data, list) else 1
                            sport_data[json_file.stem] = {
                                "count": count,
                                "file": str(json_file),
                                "size_kb": json_file.stat().st_size / 1024
                            }
                            index["totals"]["entries"] += count
                            index["totals"]["files"] += 1
                    except:
                        pass
                        
                if sport_data:
                    index["sports"][sport_dir.name] = sport_data
                    
        with open(self.data_dir / "master_index.json", "w") as f:
            json.dump(index, f, indent=2)
            
        logger.info(f"Master index: {index['totals']['entries']} entries in {index['totals']['files']} files")
        
    def run(self, sports: List[str] = None, force: bool = False):
        """Run the update process."""
        # Backwards-compatible signature; extended mode flags are passed in from main().
        start_time = datetime.now()
        logger.info("=" * 70)
        logger.info(f"Sports Data Updater - Started at {start_time}")
        logger.info("=" * 70)
        
        sports_to_update = sports or list(SPORTS_CONFIG.keys())
        
        for sport in sports_to_update:
            if sport in SPORTS_CONFIG:
                # odds_only is plumbed from main() by setting self._odds_only on the instance
                self.update_sport(sport, SPORTS_CONFIG[sport], force, getattr(self, "_odds_only", False))
                
                # Fetch player props for supported sports
                if (not getattr(self, "_odds_only", False)) and sport in ["nba", "nfl", "nhl", "epl"]:
                    try:
                        self.fetch_player_props(sport, SPORTS_CONFIG[sport])
                    except Exception as e:
                        logger.debug(f"Player props not available for {sport}: {e}")
            else:
                logger.warning(f"Unknown sport: {sport}")
                
        # Create master index
        # Integrate Odds API
        if (not getattr(self, "_odds_only", False)) and fetch_odds_api:
            try:
                logger.info("Running The Odds API fetcher...")
                fetch_odds_api.fetch_all_mapped_sports()
            except Exception as e:
                logger.error(f"Error running Odds API fetcher: {e}")

        self.create_master_index()
        
        # Summary
        duration = (datetime.now() - start_time).total_seconds()
        logger.info("=" * 70)
        logger.info("UPDATE COMPLETE")
        logger.info(f"Duration: {duration:.1f}s")
        logger.info(f"Fetched: {self.stats['fetched']}")
        logger.info(f"Skipped: {self.stats['skipped']}")
        logger.info(f"Errors: {self.stats['errors']}")
        logger.info("=" * 70)


def main():
    parser = argparse.ArgumentParser(description="Update sports data from BallDontLie API")
    parser.add_argument("--sport", type=str, help="Specific sport to update")
    parser.add_argument("--force", action="store_true", help="Force update")
    parser.add_argument("--odds-only", action="store_true", help="Only update odds (fast, avoids refetching large endpoints like players)")
    parser.add_argument("--list", action="store_true", help="List available sports")
    args = parser.parse_args()
    
    if args.list:
        print("Available sports:")
        for sport in SPORTS_CONFIG:
            print(f"  - {sport}")
        return
        
    updater = SportsDataUpdater()
    updater._odds_only = bool(args.odds_only)
    sports = [args.sport] if args.sport else None
    updater.run(sports=sports, force=args.force)


if __name__ == "__main__":
    main()
