#!/usr/bin/env python3
"""
Extract Referee Assignments from SportsGame raw JSON
Creates structured referee data for totals analysis.

Run: Weekly or after game data syncs
"""
import psycopg2
import json
import os
import re
from datetime import datetime, timezone
from collections import defaultdict


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except FileNotFoundError:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def ensure_referee_table(cur):
    """Create GameReferee table if it doesn't exist"""
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "GameReferee" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(50) NOT NULL,
            "gameId" BIGINT NOT NULL,
            "gameDate" TIMESTAMP,
            "refereeName" VARCHAR(200) NOT NULL,
            "refereeRole" VARCHAR(50),
            "refereeId" VARCHAR(100),
            "createdAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE(league, "gameId", "refereeName")
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS "GameReferee_league_ref_idx" ON "GameReferee" (league, "refereeName")')
    cur.execute('CREATE INDEX IF NOT EXISTS "GameReferee_gameId_idx" ON "GameReferee" ("gameId")')


def extract_refs_from_json(raw_json, league):
    """Extract referee names from various JSON structures"""
    refs = []

    if not raw_json:
        return refs

    if isinstance(raw_json, str):
        try:
            raw_json = json.loads(raw_json)
        except:
            return refs

    # Try different paths based on data source

    # ESPN format: officials array
    officials = raw_json.get('officials', [])
    if not officials:
        # Try nested in gameInfo
        game_info = raw_json.get('gameInfo', {})
        officials = game_info.get('officials', [])

    if not officials:
        # Try in header/competitions
        header = raw_json.get('header', {})
        competitions = header.get('competitions', [])
        if competitions:
            officials = competitions[0].get('officials', [])

    for official in officials:
        if isinstance(official, dict):
            name = official.get('displayName') or official.get('fullName') or official.get('name')
            role = official.get('position', {})
            if isinstance(role, dict):
                role = role.get('displayName') or role.get('name')
            ref_id = official.get('id')

            if name:
                refs.append({
                    'name': name.strip(),
                    'role': role if isinstance(role, str) else None,
                    'id': str(ref_id) if ref_id else None
                })
        elif isinstance(official, str):
            refs.append({'name': official.strip(), 'role': None, 'id': None})

    # Try referees key (some sources)
    referees = raw_json.get('referees', [])
    for ref in referees:
        if isinstance(ref, dict):
            name = ref.get('name') or ref.get('fullName')
            if name:
                refs.append({'name': name.strip(), 'role': ref.get('role'), 'id': ref.get('id')})
        elif isinstance(ref, str):
            refs.append({'name': ref.strip(), 'role': None, 'id': None})

    # Try searching for referee patterns in string representation
    if not refs:
        raw_str = json.dumps(raw_json)
        # Look for common patterns
        ref_patterns = [
            r'"referee":\s*"([^"]+)"',
            r'"official":\s*"([^"]+)"',
            r'"ref":\s*"([^"]+)"',
        ]
        for pattern in ref_patterns:
            matches = re.findall(pattern, raw_str, re.IGNORECASE)
            for match in matches:
                if match and len(match) > 2:
                    refs.append({'name': match.strip(), 'role': None, 'id': None})

    return refs


def extract_referees():
    """Main extraction function"""
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    print("=" * 60)
    print("EXTRACT REFEREE ASSIGNMENTS")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    # Create table
    ensure_referee_table(cur)
    conn.commit()

    # Get games with raw JSON that might have referee data
    cur.execute('''
        SELECT id, league, "gameDate", raw
        FROM "SportsGame"
        WHERE raw IS NOT NULL
          AND raw::text ILIKE '%official%' OR raw::text ILIKE '%referee%'
        ORDER BY "gameDate" DESC
    ''')

    games = cur.fetchall()
    print(f"Found {len(games)} games with potential referee data")

    total_inserted = 0
    total_games_with_refs = 0
    refs_by_league = defaultdict(int)

    for game_id, league, game_date, raw_json in games:
        refs = extract_refs_from_json(raw_json, league)

        if refs:
            total_games_with_refs += 1

            for ref in refs:
                try:
                    cur.execute('''
                        INSERT INTO "GameReferee" (league, "gameId", "gameDate", "refereeName", "refereeRole", "refereeId")
                        VALUES (%s, %s, %s, %s, %s, %s)
                        ON CONFLICT (league, "gameId", "refereeName") DO NOTHING
                    ''', (league, game_id, game_date, ref['name'], ref['role'], ref['id']))

                    if cur.rowcount > 0:
                        total_inserted += 1
                        refs_by_league[league] += 1
                except Exception as e:
                    pass

        if total_games_with_refs % 500 == 0 and total_games_with_refs > 0:
            print(f"  Processed {total_games_with_refs} games with refs...")
            conn.commit()

    conn.commit()

    # Get summary stats
    cur.execute('''
        SELECT league, COUNT(DISTINCT "gameId") as games, COUNT(DISTINCT "refereeName") as unique_refs
        FROM "GameReferee"
        GROUP BY league
        ORDER BY games DESC
    ''')

    print(f"\nReferee Data Summary:")
    print(f"{'League':12} {'Games':>8} {'Unique Refs':>12}")
    print("-" * 35)
    for row in cur.fetchall():
        print(f"{row[0]:12} {row[1]:>8} {row[2]:>12}")

    cur.close()
    conn.close()

    print(f"\n{'='*60}")
    print(f"TOTAL: {total_inserted} referee assignments extracted from {total_games_with_refs} games")
    print("=" * 60)

    return {'inserted': total_inserted, 'games': total_games_with_refs}


def main():
    try:
        result = extract_referees()
        print(f"\nExtraction complete: {result['inserted']} refs from {result['games']} games")
    except Exception as e:
        print(f"ERROR: {e}")
        import traceback
        traceback.print_exc()


if __name__ == '__main__':
    main()
