#!/usr/bin/env python3
"""
Transfermarkt Soccer Injuries Scraper
Fetches injury data for major European soccer leagues
Writes to SportsDB PlayerInjury table
"""
import requests
import json
import re
import os
import psycopg2
from datetime import datetime, timezone


def load_db_url():
    env_paths = [
        '/var/www/html/eventheodds/.env',
        os.path.join(os.path.dirname(__file__), '..', '.env'),
    ]
    for env_path in env_paths:
        try:
            with open(env_path, 'r') as f:
                for line in f:
                    if line.startswith('SPORTS_DATABASE_URL='):
                        return line.split('=', 1)[1].strip().split('?')[0]
        except FileNotFoundError:
            continue
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]

# Transfermarkt league URLs
LEAGUES = {
    'epl': {'id': 'GB1', 'name': 'Premier League', 'url': 'https://www.transfermarkt.com/premier-league/verletztespieler/wettbewerb/GB1'},
    'laliga': {'id': 'ES1', 'name': 'La Liga', 'url': 'https://www.transfermarkt.com/laliga/verletztespieler/wettbewerb/ES1'},
    'bundesliga': {'id': 'L1', 'name': 'Bundesliga', 'url': 'https://www.transfermarkt.com/bundesliga/verletztespieler/wettbewerb/L1'},
    'seriea': {'id': 'IT1', 'name': 'Serie A', 'url': 'https://www.transfermarkt.com/serie-a/verletztespieler/wettbewerb/IT1'},
    'ligue1': {'id': 'FR1', 'name': 'Ligue 1', 'url': 'https://www.transfermarkt.com/ligue-1/verletztespieler/wettbewerb/FR1'},
}

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
}


def fetch_league_injuries(league_id, league_info):
    """Fetch injuries from Transfermarkt for a league"""
    url = league_info['url']
    injuries = []

    try:
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"  {league_info['name']}: HTTP {resp.status_code}")
            return injuries

        html = resp.text

        # Parse injury table - Transfermarkt uses specific table structure
        # Look for player rows in the injury table

        # Simple regex extraction for player names and injury info
        # Pattern: player name followed by injury type and expected return

        # Find all table rows with injury data
        player_pattern = r'<td class="hauptlink"[^>]*>.*?<a[^>]*>([^<]+)</a>.*?</td>'
        injury_pattern = r'<td class="zentriert"[^>]*>([^<]+)</td>'

        # Extract player blocks
        blocks = re.findall(r'<tr class="(?:odd|even)"[^>]*>(.*?)</tr>', html, re.DOTALL)

        for block in blocks:
            try:
                # Get player name
                player_match = re.search(r'hauptlink.*?<a[^>]*title="([^"]+)"', block)
                if not player_match:
                    continue
                player_name = player_match.group(1)

                # Get team
                team_match = re.search(r'<img[^>]*title="([^"]+)"[^>]*class="tiny_wappen"', block)
                team = team_match.group(1) if team_match else ''

                # Get injury type
                injury_match = re.search(r'verletzung.*?>([^<]+)<', block, re.IGNORECASE)
                injury_type = injury_match.group(1).strip() if injury_match else 'Unknown'

                # Get return date
                return_match = re.search(r'datum.*?>([^<]+)<', block)
                expected_return = return_match.group(1).strip() if return_match else None

                injuries.append({
                    'playerName': player_name,
                    'team': team,
                    'league': league_id,
                    'injuryType': injury_type,
                    'expectedReturn': expected_return,
                    'status': 'Out',
                    'source': 'transfermarkt',
                })
            except Exception as e:
                continue

        return injuries

    except Exception as e:
        print(f"  {league_info['name']}: Error - {e}")
        return injuries


def ingest_to_sportsdb(all_injuries):
    """Write injuries to SportsDB PlayerInjury table"""
    db_url = load_db_url()
    if not db_url:
        print('Warning: SPORTS_DATABASE_URL not found, skipping DB write')
        return 0

    try:
        conn = psycopg2.connect(db_url)
        cur = conn.cursor()
        total_added = 0

        for league_id, injuries in all_injuries.items():
            added = 0
            for inj in injuries:
                player_name = inj.get('playerName', '')
                if not player_name:
                    continue

                # Create stable ID from name
                player_id = f"tm_{player_name.replace(' ', '_').lower()}"

                try:
                    cur.execute('''
                        INSERT INTO "PlayerInjury"
                        (league, "playerExternalId", "playerName", team, position, status,
                         "injuryType", description, source, "sourceUpdatedAt", raw, "updatedAt")
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
                        ON CONFLICT (league, "playerExternalId", source)
                        DO UPDATE SET
                            status = EXCLUDED.status,
                            "injuryType" = EXCLUDED."injuryType",
                            description = EXCLUDED.description,
                            "sourceUpdatedAt" = EXCLUDED."sourceUpdatedAt",
                            raw = EXCLUDED.raw,
                            "updatedAt" = NOW()
                    ''', (
                        league_id,
                        player_id,
                        player_name,
                        inj.get('team', ''),
                        '',  # position not available
                        inj.get('status', 'Out'),
                        inj.get('injuryType', ''),
                        f"Expected return: {inj.get('expectedReturn', 'Unknown')}",
                        'transfermarkt',
                        datetime.now(timezone.utc),
                        json.dumps(inj),
                    ))
                    added += 1
                except Exception as e:
                    conn.rollback()
                    print(f'  Error inserting {player_name}: {e}')

            conn.commit()
            print(f'  {league_id.upper()}: Added/updated {added} injuries to SportsDB')
            total_added += added

        cur.close()
        conn.close()
        return total_added
    except Exception as e:
        print(f'  DB connection error: {e}')
        return 0


def fetch_all_soccer_injuries():
    """Fetch injuries for all soccer leagues"""
    print("╔════════════════════════════════════════════════════════════╗")
    print("║           TRANSFERMARKT SOCCER INJURIES                    ║")
    print("╚════════════════════════════════════════════════════════════╝\n")

    all_injuries = {}
    total = 0

    for league_id, league_info in LEAGUES.items():
        print(f"  Fetching {league_info['name']}...")
        injuries = fetch_league_injuries(league_id, league_info)
        all_injuries[league_id] = injuries
        total += len(injuries)
        print(f"    Found {len(injuries)} injuries")

    print(f"\n  TOTAL: {total} soccer injuries across {len(LEAGUES)} leagues")

    # Write to SportsDB
    if total > 0:
        db_added = ingest_to_sportsdb(all_injuries)
        print(f"\n  ✅ Wrote {db_added} injuries to SportsDB")

    # Also save JSON backup
    output_path = os.path.join(os.path.dirname(__file__), '..', 'soccer_injuries.json')
    with open(output_path, 'w') as f:
        json.dump({
            'injuries': all_injuries,
            'total': total,
            'timestamp': datetime.now(timezone.utc).isoformat(),
        }, f, indent=2)
    print(f"  Results saved to soccer_injuries.json")

    return all_injuries


# Alternative: Use Rotowire for injuries (more reliable)
def fetch_rotowire_injuries():
    """Fetch injuries from Rotowire (backup source)"""
    print("\n=== ROTOWIRE SOCCER INJURIES ===\n")

    url = 'https://www.rotowire.com/soccer/injury-report.php'

    try:
        resp = requests.get(url, headers=HEADERS, timeout=30)
        print(f"  Rotowire: HTTP {resp.status_code}")

        if resp.status_code != 200:
            return {}

        html = resp.text
        injuries = {}
        total = 0

        # Parse injury tables - Rotowire uses standard HTML tables
        # Look for player rows
        rows = re.findall(r'<tr[^>]*class="[^"]*player[^"]*"[^>]*>(.*?)</tr>', html, re.DOTALL | re.IGNORECASE)

        for row in rows:
            try:
                # Get player name
                name_match = re.search(r'<a[^>]*>([^<]+)</a>', row)
                if not name_match:
                    continue
                player_name = name_match.group(1).strip()

                # Get team
                team_match = re.search(r'<td[^>]*class="[^"]*team[^"]*"[^>]*>([^<]+)</td>', row, re.IGNORECASE)
                team = team_match.group(1).strip() if team_match else ''

                # Get status
                status_match = re.search(r'<span[^>]*class="[^"]*status[^"]*"[^>]*>([^<]+)</span>', row, re.IGNORECASE)
                status = status_match.group(1).strip() if status_match else 'Out'

                # Get injury
                injury_match = re.search(r'<td[^>]*class="[^"]*injury[^"]*"[^>]*>([^<]+)</td>', row, re.IGNORECASE)
                injury_type = injury_match.group(1).strip() if injury_match else 'Unknown'

                league = 'soccer'  # Rotowire combines all leagues
                if league not in injuries:
                    injuries[league] = []

                injuries[league].append({
                    'playerName': player_name,
                    'team': team,
                    'status': status,
                    'injuryType': injury_type,
                    'source': 'rotowire',
                })
                total += 1

            except Exception as e:
                continue

        # If no structured data found, try simpler extraction
        if total == 0:
            # Look for any injury mentions
            injury_mentions = re.findall(r'([A-Z][a-z]+\s+[A-Z][a-z]+)\s*[^<]*(?:Out|Questionable|Doubtful)', html)
            for name in injury_mentions[:50]:  # Limit to 50
                if 'soccer' not in injuries:
                    injuries['soccer'] = []
                injuries['soccer'].append({
                    'playerName': name,
                    'status': 'Out',
                    'source': 'rotowire',
                })
                total += 1

        print(f"  Found {total} injuries from Rotowire")
        return injuries

    except Exception as e:
        print(f"  Rotowire error: {e}")
        return {}


if __name__ == '__main__':
    # Try Transfermarkt first
    injuries = fetch_all_soccer_injuries()

    # If Transfermarkt fails, try Rotowire
    if sum(len(v) for v in injuries.values()) == 0:
        print("\nTransfermarkt returned no data, trying Rotowire...")
        rotowire_injuries = fetch_rotowire_injuries()

        # Save Rotowire results and write to SportsDB
        if rotowire_injuries:
            total = sum(len(v) for v in rotowire_injuries.values())

            # Write to SportsDB
            if total > 0:
                db_added = ingest_to_sportsdb(rotowire_injuries)
                print(f"\n  ✅ Wrote {db_added} Rotowire injuries to SportsDB")

            # JSON backup
            output_path = os.path.join(os.path.dirname(__file__), '..', 'soccer_injuries.json')
            with open(output_path, 'w') as f:
                json.dump({
                    'injuries': rotowire_injuries,
                    'total': total,
                    'source': 'rotowire',
                    'timestamp': datetime.now(timezone.utc).isoformat(),
                }, f, indent=2)
            print(f"  Results saved to soccer_injuries.json")
