#!/usr/bin/env python3
"""
OddsPortal Half-Time Odds Scraper v2
Uses Playwright with residential proxy to extract half-time spread/total.

Targets: NBA, NFL, NHL
Output: GameHalfLine table

Run: Every 4-6 hours via cron
"""
import json
import os
import sys
import time
import re
import random
import psycopg2
from datetime import datetime, timezone, timedelta
from dotenv import load_dotenv

VENV_PATH = '/var/www/html/eventheodds/.venv-scraper/lib/python3.12/site-packages'
if VENV_PATH not in sys.path:
    sys.path.insert(0, VENV_PATH)

from playwright.sync_api import sync_playwright

# Residential proxy (better for OddsPortal)
PROXY_CONFIG = {'server': 'socks5://54.38.19.233:3028'}

# League config
LEAGUES = {
    'nba': {
        'url': 'https://www.oddsportal.com/basketball/usa/nba/',
        'pattern': r'/basketball/usa/nba/[a-z0-9-]+-[A-Za-z0-9]{8}/',
    },
    'nfl': {
        'url': 'https://www.oddsportal.com/american-football/usa/nfl/',
        'pattern': r'/american-football/usa/nfl/[a-z0-9-]+-[A-Za-z0-9]{8}/',
    },
    'nhl': {
        'url': 'https://www.oddsportal.com/hockey/usa/nhl/',
        'pattern': r'/hockey/usa/nhl/[a-z0-9-]+-[A-Za-z0-9]{8}/',
    },
}

USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/120.0.0.0',
]


def load_db_url():
    load_dotenv('/var/www/html/eventheodds/.env')
    url = os.environ.get('SPORTS_DATABASE_URL', '')
    return url.split('?')[0] if '?' in url else url


def extract_odds_from_page(page, game_url, league):
    """Extract odds from a game page"""
    results = []
    
    try:
        page.goto(game_url, timeout=45000)
        time.sleep(random.uniform(2, 4))
        
        content = page.content()
        
        # Get teams from title
        # Format: "Team A - Team B Odds, Predictions & H2H | OddsPortal"
        title = page.title()
        match = re.match(r'^([^-]+)\s*-\s*([^|]+)', title)
        if not match:
            return results

        away_team = match.group(1).strip()
        home_team = match.group(2).strip()

        # Clean up team names - remove "Odds, Predictions & H2H" suffix
        home_team = re.sub(r'\s+Odds.*$', '', home_team, flags=re.I).strip()
        away_team = re.sub(r'\s+Odds.*$', '', away_team, flags=re.I).strip()
        
        # Extract American odds from class="odds" elements
        # Pattern: class="...odds...">(+/-NNN)<
        odds_matches = re.findall(r'class="[^"]*odds[^"]*"[^>]*>([+-]?\d{3,4})<', content)
        
        if len(odds_matches) >= 2:
            # First odds are typically moneyline/spread for home/away
            # Find section headers to determine market type
            
            # Check if page has half-time section
            has_half = '1st half' in content.lower() or '1st Half' in content
            
            # Store full game odds
            results.append({
                'period': 'full',
                'market': 'moneyline',
                'home_team': home_team,
                'away_team': away_team,
                'home_odds': odds_matches[0] if odds_matches else None,
                'away_odds': odds_matches[1] if len(odds_matches) > 1 else None,
            })
            
            # Try to navigate to half-time section
            if has_half:
                try:
                    # Click on 1st Half tab
                    half_elem = page.locator('text=1st Half').first
                    half_elem.click()
                    time.sleep(2)
                    
                    half_content = page.content()
                    half_odds = re.findall(r'class="[^"]*odds[^"]*"[^>]*>([+-]?\d{3,4})<', half_content)
                    
                    if half_odds:
                        results.append({
                            'period': '1h',
                            'market': 'moneyline',
                            'home_team': home_team,
                            'away_team': away_team,
                            'home_odds': half_odds[0] if half_odds else None,
                            'away_odds': half_odds[1] if len(half_odds) > 1 else None,
                        })
                except:
                    pass
        
        # Look for spread/handicap values with better patterns
        # Pattern: handicap markers like "-3.5" or "+7" near odds
        spread_matches = re.findall(r'(?:handicap|spread)[^>]*>.*?([+-]\d+\.?\d?)', content, re.I | re.S)
        if not spread_matches:
            # Try alternate pattern: look for +/- numbers that look like spreads (1-20 range)
            spread_matches = re.findall(r'>([+-](?:1?\d(?:\.\d)?|20))<', content)

        # Look for total/over-under values with validation
        # Pattern: O/U followed by realistic total (NFL: 30-60, NBA: 180-260, NHL: 4-8)
        total_matches = re.findall(r'(?:O|U|Over|Under)\s*(\d+\.?\d*)', content, re.I)

        # Filter to realistic totals based on league
        valid_totals = []
        for t in total_matches:
            try:
                val = float(t)
                # NBA: 180-280, NFL: 30-70, NHL: 4-10
                if league == 'nba' and 150 <= val <= 300:
                    valid_totals.append(t)
                elif league == 'nfl' and 25 <= val <= 75:
                    valid_totals.append(t)
                elif league == 'nhl' and 3 <= val <= 12:
                    valid_totals.append(t)
            except:
                pass

        if spread_matches:
            results.append({
                'period': 'full',
                'market': 'spread',
                'home_team': home_team,
                'away_team': away_team,
                'line_value': spread_matches[0] if spread_matches else None,
            })

        if valid_totals:
            results.append({
                'period': 'full',
                'market': 'total',
                'home_team': home_team,
                'away_team': away_team,
                'line_value': valid_totals[0] if valid_totals else None,
            })
        
        return results
        
    except Exception as e:
        print(f"      Error: {str(e)[:50]}")
        return results


def store_odds(conn, league, odds_list):
    """Store extracted odds in GameHalfLine table"""
    cur = conn.cursor()
    stored = 0
    
    for od in odds_list:
        try:
            game_date = datetime.now(timezone.utc) + timedelta(days=1)
            
            cur.execute('''
                INSERT INTO "GameHalfLine"
                (league, "gameDate", "homeTeam", "awayTeam",
                 period, market, "lineValue", "bookOdds", bookmaker,
                 "createdAt", "updatedAt")
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 'oddsportal', NOW(), NOW())
                ON CONFLICT DO NOTHING
            ''', (
                league,
                game_date,
                od.get('home_team'),
                od.get('away_team'),
                od.get('period', '1h'),
                od.get('market', 'moneyline'),
                od.get('line_value'),
                od.get('home_odds'),
            ))
            stored += 1
        except Exception as e:
            pass
    
    conn.commit()
    cur.close()
    return stored


def main():
    print("=" * 60)
    print("ODDSPORTAL HALF-TIME ODDS SCRAPER v2")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)
    
    db_url = load_db_url()
    if not db_url:
        print("ERROR: No database URL")
        return
    
    conn = psycopg2.connect(db_url)
    total = 0
    
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent=random.choice(USER_AGENTS),
            proxy=PROXY_CONFIG,
        )
        context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        page = context.new_page()
        
        for league, config in LEAGUES.items():
            print(f"\n{league.upper()}: Scraping OddsPortal...")
            
            try:
                # Get league page
                page.goto(config['url'], timeout=60000)
                time.sleep(random.uniform(3, 5))
                
                content = page.content()
                game_urls = list(set(re.findall(config['pattern'], content)))
                print(f"  Found {len(game_urls)} games")
                
                # Process games (limit to avoid rate limiting)
                for url in game_urls[:8]:
                    full_url = 'https://www.oddsportal.com' + url
                    print(f"    {url.split('/')[-2][:40]}...")
                    
                    odds = extract_odds_from_page(page, full_url, league)
                    if odds:
                        stored = store_odds(conn, league, odds)
                        total += stored
                        print(f"      Stored {stored} odds")
                    
                    time.sleep(random.uniform(2, 4))
                    
            except Exception as e:
                print(f"  Error: {e}")
            
            time.sleep(random.uniform(3, 5))
        
        browser.close()
    
    conn.close()
    
    print("\n" + "=" * 60)
    print(f"TOTAL: {total} odds scraped from OddsPortal")
    print("=" * 60)


if __name__ == '__main__':
    main()
