#!/usr/bin/env python3
"""
VegasInsider Scraper
Fetches:
1. Consensus Betting Percentages (public money %)
2. Futures Odds (Championship, MVP, Season Wins)
"""
import requests
import psycopg2
import json
import re
from datetime import datetime, timezone
from bs4 import BeautifulSoup
import time

BASE_URL = 'https://www.vegasinsider.com'

LEAGUES = {
    'nba': {'consensus': '/nba/consensus/', 'futures': '/nba/odds/futures/'},
    'nfl': {'consensus': '/nfl/consensus/', 'futures': '/nfl/odds/futures/'},
    'nhl': {'consensus': '/nhl/consensus/', 'futures': '/nhl/odds/futures/'},
    'mlb': {'consensus': '/mlb/consensus/', 'futures': '/mlb/odds/futures/'},
    'ncaab': {'consensus': '/college-basketball/consensus/', 'futures': '/college-basketball/odds/futures/'},
    'ncaaf': {'consensus': '/college-football/consensus/', 'futures': '/college-football/odds/futures/'},
}

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': 'https://www.vegasinsider.com/',
}

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def fetch_page(url):
    """Fetch page with retries"""
    for attempt in range(3):
        try:
            resp = requests.get(url, headers=HEADERS, timeout=30)
            if resp.status_code == 200:
                return resp.text
            print(f'  Status {resp.status_code} for {url}')
        except Exception as e:
            print(f'  Attempt {attempt+1} failed: {e}')
        time.sleep(2)
    return None

def parse_consensus_page(html, league):
    """Parse consensus betting percentages from HTML"""
    soup = BeautifulSoup(html, 'html.parser')
    games = []
    
    # Try to find game cards/rows with consensus data
    # VegasInsider uses various class names - try multiple patterns
    
    # Pattern 1: Look for data in script tags (JSON)
    scripts = soup.find_all('script')
    for script in scripts:
        if script.string and ('consensus' in script.string.lower() or 'percentage' in script.string.lower()):
            # Try to extract JSON data
            try:
                # Look for JSON objects in script
                json_match = re.search(r'\{[^{}]*"games"[^{}]*\}', script.string)
                if json_match:
                    data = json.loads(json_match.group())
                    if 'games' in data:
                        for game in data['games']:
                            games.append(game)
            except:
                pass
    
    # Pattern 2: Look for table rows with percentage data
    tables = soup.find_all('table')
    for table in tables:
        rows = table.find_all('tr')
        for row in rows:
            cells = row.find_all(['td', 'th'])
            if len(cells) >= 4:
                # Look for percentage patterns
                text = ' '.join(c.get_text() for c in cells)
                pct_matches = re.findall(r'(\d+(?:\.\d+)?)\s*%', text)
                if pct_matches:
                    # Extract team names and percentages
                    team_text = cells[0].get_text().strip() if cells else ''
                    games.append({
                        'teams': team_text,
                        'percentages': pct_matches,
                        'raw': text
                    })
    
    # Pattern 3: Look for div-based layouts
    game_divs = soup.find_all('div', class_=re.compile(r'game|matchup|event', re.I))
    for div in game_divs:
        text = div.get_text()
        pct_matches = re.findall(r'(\d+(?:\.\d+)?)\s*%', text)
        if pct_matches and len(pct_matches) >= 2:
            # Try to find team names
            team_spans = div.find_all(['span', 'div'], class_=re.compile(r'team|name', re.I))
            teams = [s.get_text().strip() for s in team_spans[:2]]
            if teams:
                games.append({
                    'home_team': teams[0] if len(teams) > 0 else '',
                    'away_team': teams[1] if len(teams) > 1 else '',
                    'spread_pct': pct_matches[0] if len(pct_matches) > 0 else None,
                    'ml_pct': pct_matches[1] if len(pct_matches) > 1 else None,
                    'total_pct': pct_matches[2] if len(pct_matches) > 2 else None,
                })
    
    return games

def parse_futures_page(html, league):
    """Parse futures odds from HTML"""
    soup = BeautifulSoup(html, 'html.parser')
    futures = []
    
    # Look for futures tables
    tables = soup.find_all('table')
    for table in tables:
        # Check table header for futures type
        header = table.find('thead') or table.find('tr')
        header_text = header.get_text().lower() if header else ''
        
        futures_type = 'unknown'
        if 'champion' in header_text or 'winner' in header_text:
            futures_type = 'championship'
        elif 'mvp' in header_text:
            futures_type = 'mvp'
        elif 'win' in header_text and 'total' in header_text:
            futures_type = 'season_wins'
        
        rows = table.find_all('tr')
        for row in rows[1:]:  # Skip header
            cells = row.find_all('td')
            if len(cells) >= 2:
                name = cells[0].get_text().strip()
                odds_text = cells[1].get_text().strip()
                
                # Parse odds (e.g., "+500", "-150")
                odds_match = re.search(r'([+-]?\d+)', odds_text)
                odds = int(odds_match.group(1)) if odds_match else None
                
                if name and odds:
                    futures.append({
                        'type': futures_type,
                        'name': name,
                        'odds': odds,
                    })
    
    # Also look for list-based layouts
    lists = soup.find_all(['ul', 'ol'], class_=re.compile(r'odds|futures', re.I))
    for lst in lists:
        items = lst.find_all('li')
        for item in items:
            text = item.get_text()
            # Pattern: "Team Name +500" or "Player Name -150"
            match = re.match(r'(.+?)\s+([+-]\d+)', text.strip())
            if match:
                futures.append({
                    'type': 'unknown',
                    'name': match.group(1).strip(),
                    'odds': int(match.group(2)),
                })
    
    return futures

def scrape_consensus(leagues=None):
    """Scrape consensus betting percentages"""
    db_url = load_db_url()
    if not db_url:
        print('Error: SPORTS_DATABASE_URL not found')
        return {'success': False}
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    # Create ConsensusOdds table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "ConsensusOdds" (
            id BIGSERIAL PRIMARY KEY,
            league TEXT NOT NULL,
            "gameId" TEXT NOT NULL,
            "gameDate" TIMESTAMPTZ,
            "homeTeam" TEXT,
            "awayTeam" TEXT,
            "spreadHomePct" FLOAT,
            "spreadAwayPct" FLOAT,
            "mlHomePct" FLOAT,
            "mlAwayPct" FLOAT,
            "totalOverPct" FLOAT,
            "totalUnderPct" FLOAT,
            "fetchedAt" TIMESTAMPTZ DEFAULT NOW(),
            raw JSONB,
            UNIQUE(league, "gameId")
        )
    ''')
    conn.commit()
    
    target_leagues = leagues or list(LEAGUES.keys())
    stats_added = 0
    
    for league in target_leagues:
        if league not in LEAGUES:
            continue
        
        url = BASE_URL + LEAGUES[league]['consensus']
        print(f'\n=== Scraping {league.upper()} Consensus from {url} ===')
        
        html = fetch_page(url)
        if not html:
            print(f'  Failed to fetch {league} consensus')
            continue
        
        games = parse_consensus_page(html, league)
        print(f'  Found {len(games)} games with consensus data')
        
        for game in games:
            # Generate game ID
            home = game.get('home_team', '') or game.get('teams', '')[:20]
            away = game.get('away_team', '') or ''
            game_id = f"vi_{league}_{home}_{away}_{datetime.now().strftime('%Y%m%d')}"
            
            try:
                cur.execute('''
                    INSERT INTO "ConsensusOdds"
                    (league, "gameId", "gameDate", "homeTeam", "awayTeam",
                     "spreadHomePct", "spreadAwayPct", "mlHomePct", "mlAwayPct",
                     "totalOverPct", "totalUnderPct", raw)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (league, "gameId")
                    DO UPDATE SET
                        "spreadHomePct" = EXCLUDED."spreadHomePct",
                        "mlHomePct" = EXCLUDED."mlHomePct",
                        "totalOverPct" = EXCLUDED."totalOverPct",
                        "fetchedAt" = NOW()
                ''', (
                    league, game_id, datetime.now(timezone.utc),
                    game.get('home_team', ''), game.get('away_team', ''),
                    float(game.get('spread_pct', 0) or 0) if game.get('spread_pct') else None,
                    100 - float(game.get('spread_pct', 0) or 0) if game.get('spread_pct') else None,
                    float(game.get('ml_pct', 0) or 0) if game.get('ml_pct') else None,
                    100 - float(game.get('ml_pct', 0) or 0) if game.get('ml_pct') else None,
                    float(game.get('total_pct', 0) or 0) if game.get('total_pct') else None,
                    100 - float(game.get('total_pct', 0) or 0) if game.get('total_pct') else None,
                    json.dumps(game)
                ))
                stats_added += 1
            except Exception as e:
                conn.rollback()
                print(f'  Error inserting: {e}')
        
        conn.commit()
        time.sleep(2)  # Rate limit
    
    cur.close()
    conn.close()
    
    print(f'\n✅ Consensus data ingested: {stats_added} records')
    return {'success': True, 'stats_added': stats_added}

def scrape_futures(leagues=None):
    """Scrape futures odds"""
    db_url = load_db_url()
    if not db_url:
        print('Error: SPORTS_DATABASE_URL not found')
        return {'success': False}
    
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()
    
    # Create FuturesOdds table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "FuturesOdds" (
            id BIGSERIAL PRIMARY KEY,
            league TEXT NOT NULL,
            season INT,
            "futuresType" TEXT NOT NULL,
            "teamOrPlayer" TEXT NOT NULL,
            odds INT,
            "impliedProb" FLOAT,
            "fetchedAt" TIMESTAMPTZ DEFAULT NOW(),
            raw JSONB,
            UNIQUE(league, season, "futuresType", "teamOrPlayer")
        )
    ''')
    conn.commit()
    
    target_leagues = leagues or list(LEAGUES.keys())
    stats_added = 0
    current_season = datetime.now().year if datetime.now().month >= 9 else datetime.now().year - 1
    
    for league in target_leagues:
        if league not in LEAGUES:
            continue
        
        url = BASE_URL + LEAGUES[league]['futures']
        print(f'\n=== Scraping {league.upper()} Futures from {url} ===')
        
        html = fetch_page(url)
        if not html:
            print(f'  Failed to fetch {league} futures')
            continue
        
        futures = parse_futures_page(html, league)
        print(f'  Found {len(futures)} futures entries')
        
        for future in futures:
            odds = future.get('odds', 0)
            # Calculate implied probability
            if odds > 0:
                implied_prob = 100 / (odds + 100)
            elif odds < 0:
                implied_prob = abs(odds) / (abs(odds) + 100)
            else:
                implied_prob = 0.5
            
            try:
                cur.execute('''
                    INSERT INTO "FuturesOdds"
                    (league, season, "futuresType", "teamOrPlayer", odds, "impliedProb", raw)
                    VALUES (%s, %s, %s, %s, %s, %s, %s)
                    ON CONFLICT (league, season, "futuresType", "teamOrPlayer")
                    DO UPDATE SET
                        odds = EXCLUDED.odds,
                        "impliedProb" = EXCLUDED."impliedProb",
                        "fetchedAt" = NOW()
                ''', (
                    league, current_season, future.get('type', 'unknown'),
                    future.get('name', ''), odds, implied_prob,
                    json.dumps(future)
                ))
                stats_added += 1
            except Exception as e:
                conn.rollback()
                print(f'  Error inserting: {e}')
        
        conn.commit()
        time.sleep(2)
    
    cur.close()
    conn.close()
    
    print(f'\n✅ Futures data ingested: {stats_added} records')
    return {'success': True, 'stats_added': stats_added}

def main():
    import sys
    
    mode = sys.argv[1] if len(sys.argv) > 1 else 'all'
    leagues = sys.argv[2].split(',') if len(sys.argv) > 2 else None
    
    if mode in ['consensus', 'all']:
        scrape_consensus(leagues)
    
    if mode in ['futures', 'all']:
        scrape_futures(leagues)

if __name__ == '__main__':
    main()
