#!/usr/bin/env python3
"""
Sync Sports Data from SportsData.io Cache to CSV
Generates sports_games.csv from cached API data for web UI consumption.
Supports: NFL, NHL, CFB (College Football)
"""

import os
import json
import csv
import glob
from datetime import datetime

# Paths
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
CACHE_DIR = os.path.join(PROJECT_ROOT, 'data', 'cache')
CSV_OUTPUT = os.path.join(PROJECT_ROOT, 'data', 'csv', 'sports_games.csv')

# NHL teams (3-letter abbreviations)
NHL_TEAMS = {'ANA', 'ARI', 'BOS', 'BUF', 'CAR', 'CBJ', 'CGY', 'CHI', 'COL', 'DAL', 
             'DET', 'EDM', 'FLA', 'LA', 'MIN', 'MON', 'MTL', 'NAS', 'NJ', 'NYI', 
             'NYR', 'OTT', 'PHI', 'PIT', 'SEA', 'SJ', 'STL', 'TB', 'TOR', 'VAN', 
             'VEG', 'WAS', 'WSH', 'WPG'}

# NFL teams (3-letter abbreviations)
NFL_TEAMS = {'ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN',
             'DET', 'GB', 'HOU', 'IND', 'JAC', 'JAX', 'KC', 'LA', 'LAC', 'LAR', 'LV',
             'MIA', 'MIN', 'NE', 'NO', 'NYG', 'NYJ', 'OAK', 'PHI', 'PIT', 'SEA', 
             'SF', 'TB', 'TEN', 'WAS', 'WSH'}


def detect_sport(game, all_teams=None):
    """Detect sport based on game structure and team names"""
    home = game.get('HomeTeam', '')
    
    # Check for GameKey format (NFL-specific) - looks like 202411731
    game_key = str(game.get('GameKey', ''))
    if game_key and len(game_key) >= 9:
        return 'nfl'
    
    # Check for college teams (4+ characters, usually all caps)
    if len(home) >= 4 and home.isupper():
        return 'cfb'
    
    # Check for NHL teams (most NHL teams have unique abbrevs)
    nhl_unique = {'MON', 'MTL', 'NAS', 'CBJ', 'VEG', 'VAN', 'WPG', 'EDM', 'CGY', 'TOR', 'OTT', 'NYI', 'NYR', 'NJ', 'FLA', 'ANA', 'SJ', 'LA'}
    if home in nhl_unique:
        return 'nhl'
    
    # Check score pattern (NHL scores typically 0-8, NFL typically 0-50+)
    home_score = game.get('HomeScore') or game.get('HomeTeamScore') or 0
    away_score = game.get('AwayScore') or game.get('AwayTeamScore') or 0
    try:
        total_score = int(home_score) + int(away_score)
    except:
        total_score = 0
    
    if total_score > 0:
        if total_score <= 15:  # Typical NHL game total
            return 'nhl'
        elif total_score <= 80:  # Typical NFL/CFB game total
            # Use GameID format to distinguish - NHL uses different format
            game_id = str(game.get('GameID', ''))
            if game_id and len(game_id) >= 5:
                return 'nhl'  # NHL uses GameID
            return 'nfl'
    
    # Check for Period field (NHL uses periods, not quarters)
    if 'Periods' in game or ('Period' in game and game.get('Period') in ['1', '2', '3', 'OT', 'SO', None]):
        if len(home) <= 3 and home not in NFL_TEAMS:
            return 'nhl'
    
    # Default based on team overlap analysis
    if home in NHL_TEAMS and home not in NFL_TEAMS:
        return 'nhl'
    
    return 'nfl'  # Default to NFL


def load_cached_games():
    """Load all games from cache files"""
    all_games = []
    sport_counts = {'nfl': 0, 'nhl': 0, 'cfb': 0, 'unknown': 0}
    
    cache_files = glob.glob(os.path.join(CACHE_DIR, '*.json'))
    print(f"Found {len(cache_files)} cache files")
    
    for cache_file in cache_files:
        try:
            with open(cache_file, 'r') as f:
                raw = json.load(f)
            
            # Handle wrapped format: {cached_at, data: [...]}
            if isinstance(raw, dict) and 'data' in raw:
                data = raw['data']
            elif isinstance(raw, list):
                data = raw
            else:
                continue
            
            if not isinstance(data, list) or len(data) == 0:
                continue
                
            first_item = data[0]
            
            # Skip if no team data
            if 'HomeTeam' not in first_item:
                continue
            
            for game in data:
                status = game.get('Status', '')
                is_complete = status in ['Final', 'F', 'F/OT'] or game.get('IsOver') or game.get('IsClosed')
                
                if is_complete:
                    sport = detect_sport(game)
                    normalized = normalize_game(game, sport)
                    if normalized:
                        all_games.append(normalized)
                        sport_counts[sport] = sport_counts.get(sport, 0) + 1
                        
        except (json.JSONDecodeError, KeyError, TypeError) as e:
            continue
    
    print(f"Sport breakdown: {sport_counts}")
    return all_games


def normalize_game(game, sport):
    """Normalize game data to CSV format"""
    home_team = game.get('HomeTeam', 'Unknown')
    away_team = game.get('AwayTeam', 'Unknown')
    
    # Get scores (different field names for different sports)
    home_score = game.get('HomeScore') or game.get('HomeTeamScore') or 0
    away_score = game.get('AwayScore') or game.get('AwayTeamScore') or 0
    
    try:
        home_score = int(home_score)
        away_score = int(away_score)
    except:
        home_score = 0
        away_score = 0
    
    # Skip games with no scores (not completed)
    if home_score == 0 and away_score == 0:
        return None
    
    # Determine winner
    result = 'win' if home_score > away_score else 'loss'
    
    # Get date
    date_str = game.get('Date') or game.get('DateTime') or game.get('Day', '')
    try:
        if 'T' in str(date_str):
            date_obj = datetime.fromisoformat(str(date_str).replace('Z', '+00:00'))
            date_formatted = date_obj.strftime('%Y-%m-%d')
        else:
            date_formatted = str(date_str)[:10] if len(str(date_str)) >= 10 else str(date_str)
    except:
        date_formatted = str(date_str)[:10] if date_str else '2024-01-01'
    
    # Get spread/odds
    point_spread = game.get('PointSpread') or game.get('HomePointSpread') or 0
    try:
        point_spread = float(point_spread)
        if point_spread < 0:
            odds = f"{int(point_spread * 20)-100}"
        else:
            odds = f"+{int(point_spread * 20)+100}"
    except:
        odds = "+100"
    
    return {
        'sport': sport,  # NEW: sport column
        'date': date_formatted,
        'team': home_team,
        'opponent': away_team,
        'team_score': home_score,
        'opponent_score': away_score,
        'odds': odds,
        'result': result,
        'spread': point_spread,
    }


def write_csv(games, output_path):
    """Write games to CSV file"""
    if not games:
        print("No games to write!")
        return 0
    
    # Sort by sport then date
    games.sort(key=lambda g: (g['sport'], g['date']))
    
    # Remove duplicates (by sport + date + team combination)
    seen = set()
    unique_games = []
    for game in games:
        key = f"{game['sport']}-{game['date']}-{game['team']}"
        if key not in seen:
            seen.add(key)
            unique_games.append(game)
    
    fieldnames = ['sport', 'date', 'team', 'opponent', 'team_score', 'opponent_score', 'odds', 'result', 'spread']
    
    with open(output_path, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(unique_games)
    
    # Print breakdown
    by_sport = {}
    for g in unique_games:
        by_sport[g['sport']] = by_sport.get(g['sport'], 0) + 1
    print(f"Games per sport: {by_sport}")
    
    return len(unique_games)


def main():
    print("=" * 60)
    print("Sports Data Sync: Cache -> CSV")
    print("=" * 60)
    
    games = load_cached_games()
    print(f"Loaded {len(games)} total games from cache")
    
    if games:
        count = write_csv(games, CSV_OUTPUT)
        print(f"\n✅ Successfully wrote {count} games to {CSV_OUTPUT}")
    else:
        print("\n❌ No games available to write")
    
    print("=" * 60)


if __name__ == '__main__':
    main()
