#!/usr/bin/env python3
"""
OddsPortal Half-Time Odds Scraper - Threaded Version
Uses multiple browser instances with proxy rotation for faster scraping.

Targets: NBA, NFL, NHL
Output: GameHalfLine table

Run: Every 4-6 hours via cron
"""
import json
import os
import sys
import time
import re
import random
import psycopg2
from datetime import datetime, timezone, timedelta
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

VENV_PATH = '/var/www/html/eventheodds/.venv-scraper/lib/python3.12/site-packages'
if VENV_PATH not in sys.path:
    sys.path.insert(0, VENV_PATH)

from playwright.sync_api import sync_playwright

# Proxy configurations for rotation (Playwright only supports no-auth proxies)
# Note: Mobile proxy requires auth which Playwright doesn't support
PROXY_CONFIGS = [
    {'server': 'socks5://54.38.19.233:3028'},  # residential (no auth)
]

# League config
LEAGUES = {
    'nba': {
        'url': 'https://www.oddsportal.com/basketball/usa/nba/',
        'pattern': r'/basketball/usa/nba/[a-z0-9-]+-[A-Za-z0-9]{8}/',
    },
    'nfl': {
        'url': 'https://www.oddsportal.com/american-football/usa/nfl/',
        'pattern': r'/american-football/usa/nfl/[a-z0-9-]+-[A-Za-z0-9]{8}/',
    },
    'nhl': {
        'url': 'https://www.oddsportal.com/hockey/usa/nhl/',
        'pattern': r'/hockey/usa/nhl/[a-z0-9-]+-[A-Za-z0-9]{8}/',
    },
}

USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/120.0.0.0',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0',
]

# Thread-safe counter
results_lock = threading.Lock()
total_stored = 0


def load_db_url():
    load_dotenv('/var/www/html/eventheodds/.env')
    url = os.environ.get('SPORTS_DATABASE_URL', '')
    return url.split('?')[0] if '?' in url else url


def scrape_game(game_url, league, proxy_config, db_url):
    """Scrape a single game page with its own browser context"""
    global total_stored
    results = []

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                viewport={'width': 1920, 'height': 1080},
                user_agent=random.choice(USER_AGENTS),
                proxy=proxy_config,
            )
            context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
            page = context.new_page()

            page.goto(game_url, timeout=45000)
            time.sleep(random.uniform(1.5, 3))

            content = page.content()

            # Get teams from title
            title = page.title()
            match = re.match(r'^([^-]+)\s*-\s*([^|]+)', title)
            if not match:
                browser.close()
                return []

            away_team = re.sub(r'\s+Odds.*$', '', match.group(1).strip(), flags=re.I)
            home_team = re.sub(r'\s+Odds.*$', '', match.group(2).strip(), flags=re.I)

            # Extract American odds
            odds_matches = re.findall(r'class="[^"]*odds[^"]*"[^>]*>([+-]?\d{3,4})<', content)

            if len(odds_matches) >= 2:
                results.append({
                    'period': 'full',
                    'market': 'moneyline',
                    'home_team': home_team,
                    'away_team': away_team,
                    'home_odds': odds_matches[0],
                    'away_odds': odds_matches[1] if len(odds_matches) > 1 else None,
                })

                # Try to navigate to half-time section
                if '1st half' in content.lower() or '1st Half' in content:
                    try:
                        half_elem = page.locator('text=1st Half').first
                        half_elem.click()
                        time.sleep(1.5)

                        half_content = page.content()
                        half_odds = re.findall(r'class="[^"]*odds[^"]*"[^>]*>([+-]?\d{3,4})<', half_content)

                        if half_odds and len(half_odds) >= 2:
                            results.append({
                                'period': '1h',
                                'market': 'moneyline',
                                'home_team': home_team,
                                'away_team': away_team,
                                'home_odds': half_odds[0],
                                'away_odds': half_odds[1] if len(half_odds) > 1 else None,
                            })
                    except:
                        pass

            browser.close()

            # Store results
            if results:
                conn = psycopg2.connect(db_url)
                cur = conn.cursor()
                stored = 0

                for od in results:
                    try:
                        game_date = datetime.now(timezone.utc) + timedelta(days=1)
                        cur.execute('''
                            INSERT INTO "GameHalfLine"
                            (league, "gameDate", "homeTeam", "awayTeam",
                             period, market, "lineValue", "bookOdds", bookmaker,
                             "createdAt", "updatedAt")
                            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 'oddsportal', NOW(), NOW())
                            ON CONFLICT DO NOTHING
                        ''', (
                            league,
                            game_date,
                            od.get('home_team'),
                            od.get('away_team'),
                            od.get('period', '1h'),
                            od.get('market', 'moneyline'),
                            None,
                            od.get('home_odds'),
                        ))
                        stored += 1
                    except:
                        pass

                conn.commit()
                cur.close()
                conn.close()

                with results_lock:
                    total_stored += stored

                game_name = game_url.split('/')[-2][:30]
                print(f"    ✓ {game_name}: {stored} odds ({od.get('home_team', '')[:15]} vs {od.get('away_team', '')[:15]})")
                return results

    except Exception as e:
        game_name = game_url.split('/')[-2][:30]
        print(f"    ✗ {game_name}: {str(e)[:40]}")

    return []


def get_game_urls(league, config):
    """Get all game URLs for a league"""
    proxy = random.choice(PROXY_CONFIGS)

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent=random.choice(USER_AGENTS),
            proxy=proxy,
        )
        context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        page = context.new_page()

        page.goto(config['url'], timeout=60000)
        time.sleep(random.uniform(3, 5))

        content = page.content()
        game_urls = list(set(re.findall(config['pattern'], content)))

        browser.close()

    return ['https://www.oddsportal.com' + url for url in game_urls]


def main():
    print("=" * 60)
    print("ODDSPORTAL HALF-TIME ODDS SCRAPER (THREADED)")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print(f"Proxies: {len(PROXY_CONFIGS)} available")
    print("=" * 60)

    db_url = load_db_url()
    if not db_url:
        print("ERROR: No database URL")
        return

    global total_stored
    total_stored = 0
    max_workers = 3  # Number of parallel browser instances

    for league, config in LEAGUES.items():
        print(f"\n{league.upper()}: Fetching game URLs...")

        try:
            game_urls = get_game_urls(league, config)
            print(f"  Found {len(game_urls)} games")

            if not game_urls:
                continue

            # Limit games to avoid rate limiting
            game_urls = game_urls[:10]

            # Process games in parallel with different proxies
            print(f"  Processing with {max_workers} threads...")

            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = []
                for i, url in enumerate(game_urls):
                    # Rotate proxies
                    proxy = PROXY_CONFIGS[i % len(PROXY_CONFIGS)]
                    futures.append(
                        executor.submit(scrape_game, url, league, proxy, db_url)
                    )

                # Wait for all to complete
                for future in as_completed(futures):
                    try:
                        future.result()
                    except Exception as e:
                        print(f"    Thread error: {e}")

        except Exception as e:
            print(f"  Error: {e}")

        time.sleep(random.uniform(2, 4))

    print("\n" + "=" * 60)
    print(f"TOTAL: {total_stored} odds scraped from OddsPortal")
    print("=" * 60)


if __name__ == '__main__':
    main()
