#!/usr/bin/env python3
"""
SportsData.io Mass Data Download Script
Downloads and caches all available sports data for backtesting
"""

import json
import os
import sys
import time
import urllib.request
import urllib.error
from datetime import datetime
from pathlib import Path

# Get API KEY from environment (required - no hardcoded fallbacks)
API_KEY = os.environ.get("SPORTSDATA_API_KEY")
if not API_KEY:
    print("ERROR: SPORTSDATA_API_KEY environment variable is not set", file=sys.stderr)
    sys.exit(1)
BASE_DIR = "/var/www/html/eventheodds/data"

# Define all endpoints to download
SPORTS_CONFIG = {
    "nba": {
        "name": "NBA",
        "base_url": "https://api.sportsdata.io/v3/nba/scores/json",
        "endpoints": [
            {"name": "teams", "path": "/Teams"},
            {"name": "players", "path": "/Players"},
            {"name": "standings_2024", "path": "/Standings/2024"},
            {"name": "standings_2025", "path": "/Standings/2025"},
            {"name": "games_2024", "path": "/Games/2024"},
            {"name": "games_2025", "path": "/Games/2025"},
            {"name": "stadiums", "path": "/Stadiums"},
        ]
    },
    "nfl": {
        "name": "NFL",
        "base_url": "https://api.sportsdata.io/v3/nfl/scores/json",
        "endpoints": [
            {"name": "teams", "path": "/Teams"},
            {"name": "players", "path": "/Players"},
            {"name": "standings_2024", "path": "/Standings/2024"},
            {"name": "schedules_2024", "path": "/Schedules/2024"},
            {"name": "schedules_2025", "path": "/Schedules/2025"},
            {"name": "stadiums", "path": "/Stadiums"},
            {"name": "bye_weeks_2024", "path": "/Byes/2024"},
        ]
    },
    "mlb": {
        "name": "MLB",
        "base_url": "https://api.sportsdata.io/v3/mlb/scores/json",
        "endpoints": [
            {"name": "teams", "path": "/Teams"},
            {"name": "players", "path": "/Players"},
            {"name": "standings_2024", "path": "/Standings/2024"},
            {"name": "games_2024", "path": "/Games/2024"},
            {"name": "stadiums", "path": "/Stadiums"},
        ]
    },
    "nhl": {
        "name": "NHL",
        "base_url": "https://api.sportsdata.io/v3/nhl/scores/json",
        "endpoints": [
            {"name": "teams", "path": "/Teams"},
            {"name": "players", "path": "/Players"},
            {"name": "standings_2024", "path": "/Standings/2024"},
            {"name": "games_2024", "path": "/Games/2024"},
            {"name": "games_2025", "path": "/Games/2025"},
            {"name": "stadiums", "path": "/Stadiums"},
        ]
    },
    "cfb": {
        "name": "College Football",
        "base_url": "https://api.sportsdata.io/v3/cfb/scores/json",
        "endpoints": [
            {"name": "teams", "path": "/Teams"},
            {"name": "games_2024", "path": "/Games/2024"},
            {"name": "standings_2024", "path": "/Standings/2024"},
            {"name": "stadiums", "path": "/Stadiums"},
        ]
    },
    "cbb": {
        "name": "College Basketball",
        "base_url": "https://api.sportsdata.io/v3/cbb/scores/json",
        "endpoints": [
            {"name": "teams", "path": "/Teams"},
            {"name": "games_2024", "path": "/Games/2024"},
            {"name": "games_2025", "path": "/Games/2025"},
            {"name": "standings_2024", "path": "/Standings/2024"},
            {"name": "stadiums", "path": "/Stadiums"},
        ]
    }
}

def ensure_dir(path):
    """Create directory if it doesn't exist"""
    Path(path).mkdir(parents=True, exist_ok=True)

def fetch_data(url):
    """Fetch data from API with error handling"""
    try:
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'EvenTheOdds-DataLoader/1.0')
        with urllib.request.urlopen(req, timeout=60) as response:
            return json.loads(response.read().decode('utf-8'))
    except urllib.error.HTTPError as e:
        if e.code == 401:
            return {"error": "Unauthorized - endpoint not in subscription"}
        return {"error": f"HTTP {e.code}: {e.reason}"}
    except Exception as e:
        return {"error": str(e)}

def save_data(sport, endpoint_name, data, cache_dir):
    """Save data to cache file"""
    filepath = os.path.join(cache_dir, f"{endpoint_name}.json")
    with open(filepath, 'w') as f:
        json.dump(data, f, indent=2)
    return filepath

def update_metadata(cache_dir, endpoint_name, data, filepath):
    """Update cache metadata"""
    meta_path = os.path.join(cache_dir, "cache_metadata.json")
    
    if os.path.exists(meta_path):
        with open(meta_path) as f:
            metadata = json.load(f)
    else:
        metadata = {}
    
    record_count = len(data) if isinstance(data, list) else 1
    file_size = os.path.getsize(filepath)
    
    metadata[endpoint_name] = {
        "timestamp": datetime.now().isoformat(),
        "records": record_count,
        "size_bytes": file_size,
        "file": filepath
    }
    
    with open(meta_path, 'w') as f:
        json.dump(metadata, f, indent=2)

def download_sport(sport_key, sport_config):
    """Download all endpoints for a sport"""
    sport_name = sport_config["name"]
    base_url = sport_config["base_url"]
    endpoints = sport_config["endpoints"]
    
    cache_dir = os.path.join(BASE_DIR, f"{sport_key}_cache")
    ensure_dir(cache_dir)
    
    print(f"\n{'='*60}")
    print(f"Downloading {sport_name} data...")
    print(f"{'='*60}")
    
    results = []
    
    for endpoint in endpoints:
        endpoint_name = endpoint["name"]
        endpoint_path = endpoint["path"]
        url = f"{base_url}{endpoint_path}?key={API_KEY}"
        
        print(f"\n  → {endpoint_name}...", end=" ", flush=True)
        
        data = fetch_data(url)
        
        if isinstance(data, dict) and "error" in data:
            print(f"❌ {data['error']}")
            results.append({"endpoint": endpoint_name, "status": "error", "error": data["error"]})
        else:
            record_count = len(data) if isinstance(data, list) else 1
            filepath = save_data(sport_key, endpoint_name, data, cache_dir)
            update_metadata(cache_dir, endpoint_name, data, filepath)
            file_size = os.path.getsize(filepath) / 1024
            print(f"✅ {record_count} records ({file_size:.1f} KB)")
            results.append({"endpoint": endpoint_name, "status": "success", "records": record_count, "size_kb": file_size})
        
        # Rate limiting - be nice to the API
        time.sleep(0.5)
    
    return results

def main():
    print("=" * 70)
    print("SPORTSDATA.IO MASS DATA DOWNLOAD")
    print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Target Directory: {BASE_DIR}")
    print("=" * 70)
    
    ensure_dir(BASE_DIR)
    
    all_results = {}
    total_records = 0
    total_size = 0
    
    for sport_key, sport_config in SPORTS_CONFIG.items():
        results = download_sport(sport_key, sport_config)
        all_results[sport_key] = results
        
        for r in results:
            if r["status"] == "success":
                total_records += r.get("records", 0)
                total_size += r.get("size_kb", 0)
    
    # Summary
    print("\n" + "=" * 70)
    print("DOWNLOAD SUMMARY")
    print("=" * 70)
    
    for sport_key, results in all_results.items():
        sport_name = SPORTS_CONFIG[sport_key]["name"]
        success = sum(1 for r in results if r["status"] == "success")
        failed = sum(1 for r in results if r["status"] == "error")
        records = sum(r.get("records", 0) for r in results if r["status"] == "success")
        size = sum(r.get("size_kb", 0) for r in results if r["status"] == "success")
        print(f"\n{sport_name}:")
        print(f"  Endpoints: {success} success, {failed} failed")
        print(f"  Records: {records:,}")
        print(f"  Size: {size:.1f} KB")
    
    print("\n" + "-" * 40)
    print(f"TOTAL RECORDS: {total_records:,}")
    print(f"TOTAL SIZE: {total_size / 1024:.2f} MB")
    print("=" * 70)
    print(f"Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 70)
    
    # Save summary
    summary_path = os.path.join(BASE_DIR, "download_summary.json")
    with open(summary_path, 'w') as f:
        json.dump({
            "timestamp": datetime.now().isoformat(),
            "total_records": total_records,
            "total_size_kb": total_size,
            "results": all_results
        }, f, indent=2)
    print(f"\nSummary saved to: {summary_path}")

if __name__ == "__main__":
    main()
