#!/usr/bin/env python3
"""
Scrape injury data from ESPN for all sports
Supplements BallDontLie data (NBA, NFL, MLB) with NHL, CFB, CBB
"""
import requests
import psycopg2
import json
from datetime import datetime, timezone

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

# ESPN injury endpoints
ESPN_INJURIES = {
    'nhl': 'https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/injuries',
    'ncaaf': 'https://site.api.espn.com/apis/site/v2/sports/football/college-football/injuries',
    'ncaab': 'https://site.api.espn.com/apis/site/v2/sports/basketball/mens-college-basketball/injuries',
    'wnba': 'https://site.api.espn.com/apis/site/v2/sports/basketball/wnba/injuries',
    # Also refresh existing (BDL may miss some)
    'nba': 'https://site.api.espn.com/apis/site/v2/sports/basketball/nba/injuries',
    'nfl': 'https://site.api.espn.com/apis/site/v2/sports/football/nfl/injuries',
    'mlb': 'https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/injuries',
}

def fetch_espn_injuries(league, url):
    """Fetch injuries from ESPN API"""
    try:
        resp = requests.get(url, timeout=30)
        if resp.status_code != 200:
            print(f'  ESPN {league} returned {resp.status_code}')
            return []
        
        data = resp.json()
        injuries = []
        
        # ESPN format varies by sport
        if 'injuries' in data:
            for team_data in data.get('injuries', []):
                team_name = team_data.get('team', {}).get('displayName', '')
                for injury in team_data.get('injuries', []):
                    athlete = injury.get('athlete', {})
                    player_name = athlete.get('displayName', '')
                    # Use ESPN ID if available, otherwise use name as ID
                    player_id = athlete.get('id')
                    if not player_id and player_name:
                        # Create a stable ID from name
                        player_id = f"espn_{player_name.replace(' ', '_').lower()}"
                    
                    if not player_name:
                        continue
                        
                    injuries.append({
                        'playerExternalId': str(player_id) if player_id else '',
                        'playerName': player_name,
                        'team': team_name,
                        'position': athlete.get('position', {}).get('abbreviation', ''),
                        'status': injury.get('status', 'Unknown'),
                        'injuryType': injury.get('type', {}).get('text', '') if isinstance(injury.get('type'), dict) else str(injury.get('type', '')),
                        'description': injury.get('longComment', '') or injury.get('shortComment', ''),
                        'source': 'espn',
                    })
        
        return injuries
    except Exception as e:
        print(f'  Error fetching {league}: {e}')
        return []

def ingest_injuries():
    """Ingest injuries from ESPN for all sports"""
    db_url = load_db_url()
    if not db_url:
        print('Error: SPORTS_DATABASE_URL not found')
        return
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    total_added = 0
    
    for league, url in ESPN_INJURIES.items():
        print(f'Fetching {league.upper()} injuries from ESPN...')
        injuries = fetch_espn_injuries(league, url)
        print(f'  Found {len(injuries)} injuries')
        
        added = 0
        for inj in injuries:
            if not inj['playerExternalId'] or not inj['playerName']:
                continue
            
            try:
                cur.execute('''
                    INSERT INTO "PlayerInjury" 
                    (league, "playerExternalId", "playerName", team, position, status, 
                     "injuryType", description, source, "sourceUpdatedAt", raw, "updatedAt")
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
                    ON CONFLICT (league, "playerExternalId", source)
                    DO UPDATE SET
                        status = EXCLUDED.status,
                        "injuryType" = EXCLUDED."injuryType",
                        description = EXCLUDED.description,
                        "sourceUpdatedAt" = EXCLUDED."sourceUpdatedAt",
                        raw = EXCLUDED.raw,
                        "updatedAt" = NOW()
                ''', (
                    league,
                    inj['playerExternalId'],
                    inj['playerName'],
                    inj['team'],
                    inj['position'],
                    inj['status'],
                    inj['injuryType'],
                    inj['description'],
                    inj['source'],
                    datetime.now(timezone.utc),
                    json.dumps(inj),
                ))
                added += 1
            except Exception as e:
                conn.rollback()
                print(f'  Error inserting {inj["playerName"]}: {e}')
        
        conn.commit()
        print(f'  Added/updated {added} injuries')
        total_added += added
    
    # Summary
    print(f'\n✅ Total injuries ingested: {total_added}')
    
    cur.execute('''
        SELECT league, status, COUNT(*) 
        FROM "PlayerInjury"
        GROUP BY league, status
        ORDER BY league, status
    ''')
    print('\nInjury Summary:')
    for row in cur.fetchall():
        print(f'  {row[0]}: {row[1]} = {row[2]}')
    
    cur.close()
    conn.close()

if __name__ == '__main__':
    ingest_injuries()
