#!/usr/bin/env python3
"""
Public Betting Percentage Scraper
Sources: Action Network, ESPN, etc.
Gets public money % on spreads, totals, and moneylines
"""
import requests
import psycopg2
import json
import re
from datetime import datetime, timezone
from bs4 import BeautifulSoup
import time

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'application/json, text/html,application/xhtml+xml',
    'Accept-Language': 'en-US,en;q=0.5',
}

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def fetch_action_network_data(sport='nba'):
    """Fetch public betting data from Action Network"""
    # Action Network embeds data in the page
    url = f'https://www.actionnetwork.com/{sport}/public-betting'
    
    print(f'  Fetching Action Network {sport.upper()}...')
    try:
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f'    Status: {resp.status_code}')
            return []
        
        html = resp.text
        
        # Look for JSON data embedded in script tags
        # Action Network uses Next.js with __NEXT_DATA__
        next_data_match = re.search(r'<script id="__NEXT_DATA__"[^>]*>([^<]+)</script>', html)
        if next_data_match:
            try:
                data = json.loads(next_data_match.group(1))
                # Navigate to the betting data
                props = data.get('props', {})
                page_props = props.get('pageProps', {})
                
                # Try different data paths
                games_data = (
                    page_props.get('games') or 
                    page_props.get('events') or
                    page_props.get('contestsWithBets') or
                    []
                )
                
                if games_data:
                    print(f'    Found {len(games_data)} games in __NEXT_DATA__')
                    return games_data
                    
            except json.JSONDecodeError as e:
                print(f'    JSON decode error: {e}')
        
        # Fallback: Parse HTML directly
        soup = BeautifulSoup(html, 'html.parser')
        
        games = []
        # Look for game cards
        game_cards = soup.find_all('div', {'data-testid': re.compile(r'game|event|matchup', re.I)})
        
        for card in game_cards:
            game = {}
            
            # Extract team names
            teams = card.find_all(['span', 'div'], class_=re.compile(r'team', re.I))
            if len(teams) >= 2:
                game['away_team'] = teams[0].get_text().strip()
                game['home_team'] = teams[1].get_text().strip()
            
            # Extract percentages
            pct_elements = card.find_all(['span', 'div'], class_=re.compile(r'percent|pct|betting', re.I))
            pcts = []
            for el in pct_elements:
                text = el.get_text()
                match = re.search(r'(\d+(?:\.\d+)?)\s*%?', text)
                if match:
                    pcts.append(float(match.group(1)))
            
            if pcts:
                game['spread_pct'] = pcts[0] if len(pcts) > 0 else None
                game['ml_pct'] = pcts[1] if len(pcts) > 1 else None
                game['total_pct'] = pcts[2] if len(pcts) > 2 else None
                games.append(game)
        
        print(f'    Parsed {len(games)} games from HTML')
        return games
        
    except Exception as e:
        print(f'    Error: {e}')
        return []

def fetch_espn_betting_data(sport='basketball', league='nba'):
    """Fetch betting splits from ESPN (if available)"""
    # ESPN occasionally shows betting data in their odds page
    url = f'https://www.espn.com/{sport}/{league}/odds'
    
    print(f'  Fetching ESPN {league.upper()} odds...')
    try:
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            return []
        
        # ESPN uses JSON in script tags too
        html = resp.text
        games = []
        
        # Look for betting data patterns
        pct_pattern = re.compile(r'(\d+)\s*%\s*(of|public)', re.I)
        matches = pct_pattern.findall(html)
        
        if matches:
            print(f'    Found {len(matches)} percentage references')
        
        return games
    except Exception as e:
        print(f'    ESPN error: {e}')
        return []

def scrape_public_betting(leagues=None):
    """Main scraper function"""
    db_url = load_db_url()
    if not db_url:
        print('Error: SPORTS_DATABASE_URL not found')
        return {'success': False}
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    # Ensure ConsensusOdds table exists
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "ConsensusOdds" (
            id BIGSERIAL PRIMARY KEY,
            league TEXT NOT NULL,
            "gameId" TEXT NOT NULL,
            "gameDate" TIMESTAMPTZ,
            "homeTeam" TEXT,
            "awayTeam" TEXT,
            "spreadHomePct" FLOAT,
            "spreadAwayPct" FLOAT,
            "mlHomePct" FLOAT,
            "mlAwayPct" FLOAT,
            "totalOverPct" FLOAT,
            "totalUnderPct" FLOAT,
            source TEXT DEFAULT 'actionnetwork',
            "fetchedAt" TIMESTAMPTZ DEFAULT NOW(),
            raw JSONB,
            UNIQUE(league, "gameId")
        )
    ''')
    conn.commit()
    
    league_map = {
        'nba': 'nba',
        'nfl': 'nfl',
        'nhl': 'nhl',
        'mlb': 'mlb',
        'ncaab': 'ncaab',
        'ncaaf': 'ncaaf',
    }
    
    target_leagues = leagues or list(league_map.keys())
    stats_added = 0
    
    for league in target_leagues:
        sport = league_map.get(league, league)
        print(f'\n=== Scraping {league.upper()} Public Betting ===')
        
        games = fetch_action_network_data(sport)
        
        if not games:
            print(f'  No data found for {league}')
            continue
        
        for game in games:
            home = game.get('home_team', '') or game.get('homeTeam', {}).get('name', '') or str(game.get('id', ''))[:10]
            away = game.get('away_team', '') or game.get('awayTeam', {}).get('name', '')
            
            game_id = f"pb_{league}_{home}_{away}_{datetime.now().strftime('%Y%m%d')}"
            game_id = re.sub(r'[^a-zA-Z0-9_]', '', game_id)[:100]
            
            # Extract percentages
            spread_pct = game.get('spread_pct') or game.get('spreadPercentage')
            ml_pct = game.get('ml_pct') or game.get('moneylinePercentage')
            total_pct = game.get('total_pct') or game.get('totalPercentage')
            
            try:
                cur.execute('''
                    INSERT INTO "ConsensusOdds"
                    (league, "gameId", "gameDate", "homeTeam", "awayTeam",
                     "spreadHomePct", "spreadAwayPct", "mlHomePct", "mlAwayPct",
                     "totalOverPct", "totalUnderPct", source, raw)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (league, "gameId")
                    DO UPDATE SET
                        "spreadHomePct" = EXCLUDED."spreadHomePct",
                        "spreadAwayPct" = EXCLUDED."spreadAwayPct",
                        "mlHomePct" = EXCLUDED."mlHomePct",
                        "mlAwayPct" = EXCLUDED."mlAwayPct",
                        "totalOverPct" = EXCLUDED."totalOverPct",
                        "totalUnderPct" = EXCLUDED."totalUnderPct",
                        "fetchedAt" = NOW(),
                        raw = EXCLUDED.raw
                ''', (
                    league, game_id, datetime.now(timezone.utc),
                    home, away,
                    float(spread_pct) if spread_pct else None,
                    100 - float(spread_pct) if spread_pct else None,
                    float(ml_pct) if ml_pct else None,
                    100 - float(ml_pct) if ml_pct else None,
                    float(total_pct) if total_pct else None,
                    100 - float(total_pct) if total_pct else None,
                    'actionnetwork',
                    json.dumps(game) if isinstance(game, dict) else '{}'
                ))
                stats_added += 1
            except Exception as e:
                conn.rollback()
                print(f'    Error inserting: {e}')
        
        conn.commit()
        time.sleep(2)
    
    cur.close()
    conn.close()
    
    print(f'\n✅ Public Betting data ingested: {stats_added} records')
    return {'success': True, 'stats_added': stats_added}

if __name__ == '__main__':
    import sys
    leagues = sys.argv[1].split(',') if len(sys.argv) > 1 else None
    scrape_public_betting(leagues)
