#!/usr/bin/env python3
"""
Sync TheSportsDB Data - Enhanced Multi-Season Version
Fetches historical event data for priority leagues across multiple seasons.
Preserves existing data by appending only new events (deduplication by event_id).
"""

import os
import csv
import sys
import argparse
from datetime import datetime
from typing import List, Dict, Any, Set

# Add parent directory for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from thesportsdb_fetcher import TheSportsDBFetcher

# Priority leagues to sync with their seasons format
PRIORITY_LEAGUES = [
    # Soccer (European seasons: e.g., 2024-2025)
    ('4328', 'EPL', 'English Premier League', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    ('4335', 'LaLiga', 'Spanish La Liga', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    ('4331', 'Bundesliga', 'German Bundesliga', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    ('4332', 'SerieA', 'Italian Serie A', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    ('4334', 'Ligue1', 'French Ligue 1', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    # American Sports (Calendar year seasons: e.g., 2024)
    ('4391', 'NFL', 'National Football League', ['2024', '2023', '2022', '2021']),
    ('4387', 'NBA', 'National Basketball Association', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    ('4380', 'NHL', 'National Hockey League', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
    ('4424', 'MLB', 'Major League Baseball', ['2024', '2023', '2022', '2021']),
    # Combat Sports
    ('4443', 'UFC', 'Ultimate Fighting Championship', ['2024', '2023', '2022', '2021']),
    # College (Calendar year)
    ('4479', 'NCAAF', 'NCAA Football', ['2024', '2023', '2022', '2021']),
    ('4607', 'NCAAB', 'NCAA Basketball', ['2024-2025', '2023-2024', '2022-2023', '2021-2022']),
]

OUTPUT_CSV = './data/csv/thesportsdb_games.csv'


def load_existing_events(csv_path: str) -> tuple[List[Dict[str, Any]], Set[str]]:
    """Load existing events from CSV and return list plus set of event IDs."""
    existing_events = []
    existing_ids = set()
    
    if os.path.exists(csv_path):
        try:
            with open(csv_path, 'r', newline='', encoding='utf-8') as f:
                reader = csv.DictReader(f)
                for row in reader:
                    existing_events.append(row)
                    if row.get('event_id'):
                        existing_ids.add(str(row['event_id']))
            print(f"📂 Loaded {len(existing_events)} existing events from {csv_path}")
        except Exception as e:
            print(f"⚠ Could not load existing CSV: {e}")
    else:
        print(f"📂 No existing CSV found, will create new file")
    
    return existing_events, existing_ids


def normalize_event(event: Dict[str, Any], sport: str) -> Dict[str, Any]:
    """Normalize a TheSportsDB event to a standard format."""
    date_str = event.get('dateEvent', '')
    home_team = event.get('strHomeTeam', '')
    away_team = event.get('strAwayTeam', '')
    
    home_score = event.get('intHomeScore')
    away_score = event.get('intAwayScore')
    
    # Parse scores as integers
    try:
        home_score = int(home_score) if home_score else None
    except (ValueError, TypeError):
        home_score = None
    
    try:
        away_score = int(away_score) if away_score else None
    except (ValueError, TypeError):
        away_score = None
    
    # Determine winner
    winner = None
    if home_score is not None and away_score is not None:
        if home_score > away_score:
            winner = 'home'
        elif away_score > home_score:
            winner = 'away'
        else:
            winner = 'draw'
    
    total = None
    if home_score is not None and away_score is not None:
        total = home_score + away_score
    
    return {
        'date': date_str,
        'sport': sport,
        'league': event.get('strLeague', ''),
        'home_team': home_team,
        'away_team': away_team,
        'home_score': home_score,
        'away_score': away_score,
        'total': total,
        'winner': winner,
        'event_id': event.get('idEvent', ''),
        'venue': event.get('strVenue', ''),
        'season': event.get('strSeason', ''),
    }


def map_league_to_sport(league_code: str) -> str:
    """Map league code to sport category."""
    sport_map = {
        'EPL': 'soccer', 'LaLiga': 'soccer', 'Bundesliga': 'soccer',
        'SerieA': 'soccer', 'Ligue1': 'soccer',
        'NFL': 'football', 'NBA': 'basketball', 'NHL': 'hockey',
        'MLB': 'baseball', 'UFC': 'mma',
        'NCAAF': 'cfb', 'NCAAB': 'cbb',
    }
    return sport_map.get(league_code, 'other')


def sync_leagues(fetcher: TheSportsDBFetcher, existing_ids: Set[str]) -> tuple[List[Dict[str, Any]], int]:
    """Sync all priority leagues across multiple seasons."""
    new_events = []
    skipped_count = 0
    
    for league_id, league_code, league_name, seasons in PRIORITY_LEAGUES:
        print(f"\n📊 Syncing {league_name} ({league_code})...")
        sport = map_league_to_sport(league_code)
        league_new = 0
        league_skipped = 0
        
        for season in seasons:
            try:
                # Fetch by season for full historical data
                events = fetcher.get_events_by_season(int(league_id), season)
                
                if events:
                    season_new = 0
                    for event in events:
                        event_id = str(event.get('idEvent', ''))
                        if event_id and event_id in existing_ids:
                            league_skipped += 1
                            skipped_count += 1
                            continue
                        
                        # Only include completed events (with scores)
                        if event.get('intHomeScore') is not None and event.get('intAwayScore') is not None:
                            normalized = normalize_event(event, sport)
                            normalized['league_code'] = league_code
                            new_events.append(normalized)
                            existing_ids.add(event_id)  # Prevent duplicates within same run
                            season_new += 1
                            league_new += 1
                    
                    if season_new > 0:
                        print(f"   Season {season}: +{season_new} events")
                else:
                    print(f"   Season {season}: no data")
                    
            except Exception as e:
                print(f"   Season {season}: error - {e}")
        
        print(f"   Subtotal: {league_new} new, {league_skipped} existing")
    
    return new_events, skipped_count


def write_csv(events: List[Dict[str, Any]], output_path: str):
    """Write events to CSV file."""
    if not events:
        print("No events to write!")
        return
    
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    fieldnames = ['date', 'sport', 'league', 'home_team', 'away_team', 
                  'home_score', 'away_score', 'total', 'winner', 
                  'event_id', 'venue', 'season', 'league_code']
    
    with open(output_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(events)
    
    print(f"\n✅ Wrote {len(events)} events to {output_path}")


def main():
    parser = argparse.ArgumentParser(description='Sync TheSportsDB historical data')
    parser.add_argument('-f', '--full', action='store_true', 
                        help='Force full sync (ignore existing data)')
    args = parser.parse_args()
    
    print("=" * 60)
    print("TheSportsDB Multi-Season Sync")
    print("=" * 60)
    
    fetcher = TheSportsDBFetcher()
    
    if args.full:
        print("🔄 Full sync mode - will replace all data\n")
        existing_events = []
        existing_ids = set()
    else:
        existing_events, existing_ids = load_existing_events(OUTPUT_CSV)
    
    # Sync all leagues
    new_events, skipped_count = sync_leagues(fetcher, existing_ids)
    
    # Combine existing + new events
    all_events = existing_events + new_events
    
    # Convert existing events back to proper types
    for event in all_events:
        for field in ['home_score', 'away_score', 'total']:
            if event.get(field) in ('', 'None', None):
                event[field] = None
            elif event.get(field) is not None:
                try:
                    event[field] = int(event[field])
                except (ValueError, TypeError):
                    pass
    
    # Sort by date (newest first)
    all_events.sort(key=lambda x: x.get('date', ''), reverse=True)
    
    write_csv(all_events, OUTPUT_CSV)
    
    # Summary
    print("\n" + "=" * 60)
    print("Sync Summary")
    print("=" * 60)
    print(f"  📥 New events added: {len(new_events)}")
    print(f"  ⏭ Existing events skipped: {skipped_count}")
    print(f"  📊 Total events in database: {len(all_events)}")
    
    print("\n  By Sport:")
    sport_counts = {}
    for event in all_events:
        sport = event.get('sport', 'unknown')
        sport_counts[sport] = sport_counts.get(sport, 0) + 1
    
    for sport, count in sorted(sport_counts.items(), key=lambda x: -x[1]):
        print(f"    {sport}: {count:,} events")
    
    print("=" * 60)


if __name__ == '__main__':
    main()
