#!/usr/bin/env python3
"""
Enhanced Injury Data Scraper
Aggregates injury data from multiple free sources:
- ESPN Injuries (already exists)
- Rotowire Injury Reports
- CBS Sports Injuries
- Official Team Injury Reports
"""
import requests
from bs4 import BeautifulSoup
import psycopg2
import json
import os
import time
from datetime import datetime, timezone

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (compatible; EvenTheOdds/1.0)',
    'Accept': 'text/html,application/json',
}

REQUEST_DELAY = 2.0

def load_db_url():
    """Load database URL from environment"""
    env_paths = [
        '/var/www/html/eventheodds/.env',
        os.path.join(os.path.dirname(__file__), '..', '.env'),
    ]
    for env_path in env_paths:
        try:
            with open(env_path, 'r') as f:
                for line in f:
                    if line.startswith('SPORTS_DATABASE_URL='):
                        return line.split('=', 1)[1].strip().split('?')[0]
        except FileNotFoundError:
            continue
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def fetch_rotowire_injuries(sport):
    """Scrape injuries from Rotowire"""
    print(f"\n[Rotowire] Fetching {sport.upper()} injuries...")

    urls = {
        'nba': 'https://www.rotowire.com/basketball/injury-report.php',
        'nfl': 'https://www.rotowire.com/football/injury-report.php',
        'mlb': 'https://www.rotowire.com/baseball/injury-report.php',
        'nhl': 'https://www.rotowire.com/hockey/injury-report.php',
    }

    url = urls.get(sport)
    if not url:
        return []

    try:
        time.sleep(REQUEST_DELAY)
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"  Status: {resp.status_code}")
            return []

        soup = BeautifulSoup(resp.text, 'html.parser')
        injuries = []

        # Find injury tables
        tables = soup.find_all('table', {'class': lambda x: x and 'injury' in str(x).lower()})
        if not tables:
            tables = soup.find_all('table')

        for table in tables[:5]:  # Limit tables
            rows = table.find_all('tr')
            current_team = None

            for row in rows:
                # Check for team header
                team_header = row.find('th', {'colspan': True}) or row.find('td', {'class': 'team'})
                if team_header:
                    current_team = team_header.text.strip()
                    continue

                cells = row.find_all('td')
                if len(cells) < 3:
                    continue

                try:
                    player_cell = cells[0]
                    player_link = player_cell.find('a')
                    player_name = player_link.text.strip() if player_link else player_cell.text.strip()

                    if not player_name or player_name.lower() in ['player', 'name']:
                        continue

                    injury = {
                        'playerName': player_name,
                        'team': current_team or cells[1].text.strip() if len(cells) > 1 else None,
                        'position': cells[1].text.strip() if len(cells) > 1 and not current_team else None,
                        'status': cells[2].text.strip() if len(cells) > 2 else None,
                        'injury': cells[3].text.strip() if len(cells) > 3 else None,
                        'returnDate': cells[4].text.strip() if len(cells) > 4 else None,
                        'league': sport,
                        'source': 'rotowire',
                    }

                    injuries.append(injury)

                except Exception as e:
                    continue

        print(f"  Found {len(injuries)} injuries")
        return injuries

    except Exception as e:
        print(f"  Error: {e}")
        return []


def fetch_cbs_injuries(sport):
    """Scrape injuries from CBS Sports"""
    print(f"\n[CBS Sports] Fetching {sport.upper()} injuries...")

    urls = {
        'nba': 'https://www.cbssports.com/nba/injuries/',
        'nfl': 'https://www.cbssports.com/nfl/injuries/',
        'mlb': 'https://www.cbssports.com/mlb/injuries/',
        'nhl': 'https://www.cbssports.com/nhl/injuries/',
    }

    url = urls.get(sport)
    if not url:
        return []

    try:
        time.sleep(REQUEST_DELAY)
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"  Status: {resp.status_code}")
            return []

        soup = BeautifulSoup(resp.text, 'html.parser')
        injuries = []

        # Find injury sections
        sections = soup.find_all('div', {'class': lambda x: x and 'injury' in str(x).lower()})

        for section in sections:
            team_name = None
            team_header = section.find(['h2', 'h3', 'h4'])
            if team_header:
                team_name = team_header.text.strip()

            rows = section.find_all('tr')
            for row in rows:
                cells = row.find_all(['td', 'th'])
                if len(cells) < 3:
                    continue

                try:
                    player_name = cells[0].text.strip()
                    if not player_name or player_name.lower() in ['player', 'name']:
                        continue

                    injury = {
                        'playerName': player_name,
                        'team': team_name,
                        'position': cells[1].text.strip() if len(cells) > 1 else None,
                        'status': cells[2].text.strip() if len(cells) > 2 else None,
                        'injury': cells[3].text.strip() if len(cells) > 3 else None,
                        'league': sport,
                        'source': 'cbssports',
                    }

                    injuries.append(injury)

                except Exception as e:
                    continue

        print(f"  Found {len(injuries)} injuries")
        return injuries

    except Exception as e:
        print(f"  Error: {e}")
        return []


def fetch_nfl_official_injuries():
    """Fetch NFL official injury report from NFL.com"""
    print("\n[NFL.com] Fetching official injury report...")

    url = "https://www.nfl.com/injuries/"

    try:
        time.sleep(REQUEST_DELAY)
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"  Status: {resp.status_code}")
            return []

        soup = BeautifulSoup(resp.text, 'html.parser')
        injuries = []

        # Find injury cards/sections
        injury_sections = soup.find_all('div', {'class': lambda x: x and 'injury' in str(x).lower()})

        for section in injury_sections:
            team = None
            team_elem = section.find(['h2', 'h3', 'div'], {'class': 'team'})
            if team_elem:
                team = team_elem.text.strip()

            players = section.find_all(['tr', 'div'], {'class': lambda x: x and 'player' in str(x).lower()})
            for player_elem in players:
                try:
                    name_elem = player_elem.find(['a', 'span'], {'class': 'name'})
                    status_elem = player_elem.find(['span', 'div'], {'class': 'status'})
                    injury_elem = player_elem.find(['span', 'div'], {'class': 'injury'})

                    player_name = name_elem.text.strip() if name_elem else player_elem.text.strip()

                    if player_name:
                        injuries.append({
                            'playerName': player_name,
                            'team': team,
                            'status': status_elem.text.strip() if status_elem else None,
                            'injury': injury_elem.text.strip() if injury_elem else None,
                            'league': 'nfl',
                            'source': 'nfl.com',
                        })
                except:
                    continue

        print(f"  Found {len(injuries)} injuries")
        return injuries

    except Exception as e:
        print(f"  Error: {e}")
        return []


def save_injuries_to_db(conn, injuries):
    """Save injuries to PlayerInjury table"""
    if not injuries:
        return 0

    cur = conn.cursor()
    saved = 0

    for injury in injuries:
        try:
            player_name = injury.get('playerName', '')
            league = injury.get('league', '')

            if not player_name or not league:
                continue

            # Generate player ID from name
            player_id = player_name.lower().replace(' ', '_').replace('.', '').replace("'", '')

            cur.execute('''
                INSERT INTO "PlayerInjury" (
                    "playerId", "playerName", "team", "position",
                    "status", "injuryType", "league", "source",
                    "createdAt", "updatedAt"
                )
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW())
                ON CONFLICT ("playerId", "league")
                DO UPDATE SET
                    "status" = EXCLUDED."status",
                    "injuryType" = EXCLUDED."injuryType",
                    "team" = EXCLUDED."team",
                    "source" = EXCLUDED."source",
                    "updatedAt" = NOW()
            ''', (
                player_id,
                player_name,
                injury.get('team'),
                injury.get('position'),
                injury.get('status'),
                injury.get('injury'),
                league,
                injury.get('source'),
            ))
            saved += 1

        except Exception as e:
            conn.rollback()
            continue

    conn.commit()
    cur.close()
    return saved


def main():
    print("=" * 60)
    print("ENHANCED INJURY DATA SCRAPER")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    db_url = load_db_url()
    all_injuries = []

    # Scrape from multiple sources for each sport
    sports = ['nba', 'nfl', 'mlb', 'nhl']

    for sport in sports:
        print(f"\n{'='*40}")
        print(f"Processing {sport.upper()}...")

        # Rotowire
        rotowire_injuries = fetch_rotowire_injuries(sport)
        all_injuries.extend(rotowire_injuries)

        # CBS Sports
        cbs_injuries = fetch_cbs_injuries(sport)
        all_injuries.extend(cbs_injuries)

    # NFL Official
    nfl_official = fetch_nfl_official_injuries()
    all_injuries.extend(nfl_official)

    # Summary
    print(f"\n{'='*60}")
    print(f"TOTAL INJURIES SCRAPED: {len(all_injuries)}")

    # Count by source
    sources = {}
    for injury in all_injuries:
        src = injury.get('source', 'unknown')
        sources[src] = sources.get(src, 0) + 1

    for src, count in sorted(sources.items(), key=lambda x: -x[1]):
        print(f"  {src}: {count}")

    # Save to JSON
    output_dir = '/var/www/html/eventheodds/data'
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, 'injuries_enhanced.json')

    with open(output_path, 'w') as f:
        json.dump({
            'injuries': all_injuries,
            'count': len(all_injuries),
            'timestamp': datetime.now(timezone.utc).isoformat(),
            'sources': sources,
        }, f, indent=2, default=str)

    print(f"Saved to {output_path}")

    # Save to database if available
    if db_url and all_injuries:
        try:
            conn = psycopg2.connect(db_url)
            saved = save_injuries_to_db(conn, all_injuries)
            conn.close()
            print(f"Saved {saved} injuries to database")
        except Exception as e:
            print(f"Database error: {e}")

    print("=" * 60)


if __name__ == '__main__':
    main()