#!/usr/bin/env python3
"""
Import Historical Betting Odds Data

Downloads and converts the free historical odds data from GitHub
into the structured format used by EventheOdds.ai

Sources:
- https://github.com/flancast90/sportsbookreview-scraper
- Covers NBA, NFL, NHL from 2011-2021
"""

import json
import os
import sys
from datetime import datetime
import urllib.request
import uuid

# Output directory
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'betting')

# GitHub raw URLs for the datasets
DATASETS = {
    'nba': 'https://raw.githubusercontent.com/flancast90/sportsbookreview-scraper/main/data/nba_archive_10Y.json',
    'nfl': 'https://raw.githubusercontent.com/flancast90/sportsbookreview-scraper/main/data/nfl_archive_10Y.json',
    'nhl': 'https://raw.githubusercontent.com/flancast90/sportsbookreview-scraper/main/data/nhl_archive_10Y.json',
}


def download_dataset(sport: str, url: str) -> list:
    """Download a dataset from GitHub"""
    print(f"  Downloading {sport.upper()} data from GitHub...")
    try:
        with urllib.request.urlopen(url, timeout=60) as response:
            data = json.loads(response.read().decode('utf-8'))
        print(f"  ✅ Downloaded {len(data):,} {sport.upper()} games")
        return data
    except Exception as e:
        print(f"  ❌ Error downloading {sport}: {e}")
        return []


def convert_date(date_value) -> str:
    """Convert date from YYYYMMDD float to YYYY-MM-DD string"""
    if date_value is None:
        return None
    
    # Handle float like 20111225.0
    date_str = str(int(date_value))
    if len(date_str) == 8:
        return f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}"
    return None


def get_season_from_date(date_str: str, sport: str) -> int:
    """Determine season from date"""
    if not date_str:
        return 2020
    
    year = int(date_str[:4])
    month = int(date_str[5:7])
    
    # NBA/NHL seasons span two years (Oct-June)
    if sport in ('nba', 'nhl'):
        if month >= 10:  # Oct-Dec is start of new season
            return year
        else:  # Jan-June is end of season that started previous year
            return year - 1
    
    # NFL season is mostly in fall of one year
    if sport == 'nfl':
        if month >= 9:  # Sept-Dec
            return year
        else:  # Jan-Feb playoffs
            return year - 1
    
    return year


def normalize_team_name(team: str, sport: str) -> str:
    """Normalize team name for consistency"""
    if not team:
        return "Unknown"
    
    # Remove common suffixes/formatting
    team = team.strip()
    
    # NBA team name mapping (short to full)
    nba_teams = {
        '76ers': 'Philadelphia 76ers',
        'Sixers': 'Philadelphia 76ers',
        'Blazers': 'Portland Trail Blazers',
        'Cavs': 'Cleveland Cavaliers',
    }
    
    if sport == 'nba' and team in nba_teams:
        return nba_teams[team]
    
    return team


def safe_int(value) -> int | None:
    """Safely convert a value to int"""
    if value is None:
        return None
    try:
        return int(float(value))
    except (ValueError, TypeError):
        return None


def safe_float(value) -> float | None:
    """Safely convert a value to float"""
    if value is None:
        return None
    try:
        return float(value)
    except (ValueError, TypeError):
        return None


def convert_game(raw_game: dict, sport: str) -> dict:
    """Convert a raw game to our structured format"""
    
    date = convert_date(raw_game.get('date'))
    season = raw_game.get('season') or get_season_from_date(date, sport)
    
    # Extract scores (safely convert to int)
    home_score = safe_int(raw_game.get('home_final'))
    away_score = safe_int(raw_game.get('away_final'))
    
    # Extract odds (safely convert to float/int)
    home_ml = safe_int(raw_game.get('home_close_ml'))
    away_ml = safe_int(raw_game.get('away_close_ml'))
    home_spread = safe_float(raw_game.get('home_close_spread'))
    away_spread = safe_float(raw_game.get('away_close_spread'))
    total = safe_float(raw_game.get('close_over_under')) or safe_float(raw_game.get('open_over_under'))
    
    # Determine if we have valid odds
    has_odds = any([home_ml, away_ml, home_spread, away_spread, total])
    
    # Calculate results if we have scores
    result = None
    if home_score is not None and away_score is not None:
        margin = home_score - away_score
        total_points = home_score + away_score
        
        # Winner
        if margin > 0:
            winner = 'home'
        elif margin < 0:
            winner = 'away'
        else:
            winner = 'draw'
        
        # Spread coverage
        spread_covered = None
        if home_spread is not None:
            adjusted_margin = margin + home_spread
            if adjusted_margin > 0:
                spread_covered = 'home'
            elif adjusted_margin < 0:
                spread_covered = 'away'
            else:
                spread_covered = 'push'
        
        # Total result
        total_result = None
        if total is not None:
            if total_points > total:
                total_result = 'over'
            elif total_points < total:
                total_result = 'under'
            else:
                total_result = 'push'
        
        result = {
            'winner': winner,
            'spreadCovered': spread_covered,
            'totalResult': total_result,
            'margin': margin,
            'totalPoints': total_points,
        }
    
    # Build game object
    game = {
        'id': str(uuid.uuid4())[:8],
        'sport': sport,
        'date': date,
        'season': int(season) if season else 2020,
        'homeTeam': normalize_team_name(raw_game.get('home_team'), sport),
        'awayTeam': normalize_team_name(raw_game.get('away_team'), sport),
        'scores': {
            'homeScore': home_score,
            'awayScore': away_score,
            'homeQ1': safe_int(raw_game.get('home_1stQtr')),
            'homeQ2': safe_int(raw_game.get('home_2ndQtr')),
            'homeQ3': safe_int(raw_game.get('home_3rdQtr')),
            'homeQ4': safe_int(raw_game.get('home_4thQtr')),
            'awayQ1': safe_int(raw_game.get('away_1stQtr')),
            'awayQ2': safe_int(raw_game.get('away_2ndQtr')),
            'awayQ3': safe_int(raw_game.get('away_3rdQtr')),
            'awayQ4': safe_int(raw_game.get('away_4thQtr')),
        },
        'odds': {
            'moneylineHome': home_ml,
            'moneylineAway': away_ml,
            'spreadHome': home_spread,
            'spreadAway': away_spread,
            'totalLine': total,
            'source': 'historical' if has_odds else 'none',
        },
        'result': result,
    }
    
    return game


def import_sport(sport: str, url: str) -> tuple:
    """Import a sport's data"""
    raw_data = download_dataset(sport, url)
    
    if not raw_data:
        return [], 0, 0
    
    games = []
    games_with_odds = 0
    
    for raw_game in raw_data:
        game = convert_game(raw_game, sport)
        
        # Skip games without dates
        if not game['date']:
            continue
        
        games.append(game)
        
        if game['odds']['source'] == 'historical':
            games_with_odds += 1
    
    # Sort by date
    games.sort(key=lambda g: g['date'])
    
    return games, len(games), games_with_odds


def main():
    """Main import function"""
    print("=" * 60)
    print("📊 Historical Betting Odds Data Importer")
    print("=" * 60)
    print()
    
    # Create data directory
    os.makedirs(DATA_DIR, exist_ok=True)
    print(f"📁 Output directory: {DATA_DIR}")
    print()
    
    total_games = 0
    total_with_odds = 0
    
    for sport, url in DATASETS.items():
        print(f"\n🏀 Processing {sport.upper()}...")
        
        games, count, with_odds = import_sport(sport, url)
        
        if games:
            # Save to file
            output_path = os.path.join(DATA_DIR, f'{sport}_historical.json')
            with open(output_path, 'w') as f:
                json.dump(games, f, indent=2)
            
            print(f"  📄 Saved {count:,} games to {sport}_historical.json")
            print(f"  🎰 {with_odds:,} games have odds data ({with_odds/count*100:.1f}%)")
            
            # Show date range
            dates = [g['date'] for g in games if g['date']]
            if dates:
                print(f"  📅 Date range: {min(dates)} to {max(dates)}")
            
            total_games += count
            total_with_odds += with_odds
        else:
            print(f"  ⚠️ No data imported for {sport}")
    
    # Create metadata file
    metadata = {
        'importedAt': datetime.now().isoformat(),
        'source': 'https://github.com/flancast90/sportsbookreview-scraper',
        'totalGames': total_games,
        'gamesWithOdds': total_with_odds,
        'sports': list(DATASETS.keys()),
        'dateRange': '2011-2021',
    }
    
    metadata_path = os.path.join(DATA_DIR, 'metadata.json')
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)
    
    print()
    print("=" * 60)
    print("✅ Import Complete!")
    print("=" * 60)
    print(f"📊 Total Games: {total_games:,}")
    print(f"🎰 Games with Odds: {total_with_odds:,} ({total_with_odds/total_games*100:.1f}%)")
    print(f"📁 Data saved to: {DATA_DIR}")
    print()
    
    return 0


if __name__ == '__main__':
    sys.exit(main())
