#!/usr/bin/env python3
"""
OddsPortal Odds Scraper
Aggregates odds from 50+ bookmakers for multiple sports.
Covers: NBA, NFL, NHL, MLB, NCAAB, NCAAF, Soccer, MMA
"""
import requests
from bs4 import BeautifulSoup
import psycopg2
import json
import os
import time
import random
import re
from datetime import datetime, timezone
from urllib.parse import urljoin

REQUEST_DELAY = 3.0  # Respect rate limits

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Referer': 'https://www.google.com/',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'cross-site',
}

# OddsPortal league URLs
ODDSPORTAL_LEAGUES = {
    'nba': {
        'url': 'https://www.oddsportal.com/basketball/usa/nba/',
        'league': 'nba',
    },
    'ncaab': {
        'url': 'https://www.oddsportal.com/basketball/usa/ncaa/',
        'league': 'ncaab',
    },
    'wnba': {
        'url': 'https://www.oddsportal.com/basketball/usa/wnba/',
        'league': 'wnba',
    },
    'nfl': {
        'url': 'https://www.oddsportal.com/american-football/usa/nfl/',
        'league': 'nfl',
    },
    'ncaaf': {
        'url': 'https://www.oddsportal.com/american-football/usa/ncaa/',
        'league': 'ncaaf',
    },
    'nhl': {
        'url': 'https://www.oddsportal.com/hockey/usa/nhl/',
        'league': 'nhl',
    },
    'mlb': {
        'url': 'https://www.oddsportal.com/baseball/usa/mlb/',
        'league': 'mlb',
    },
    'epl': {
        'url': 'https://www.oddsportal.com/soccer/england/premier-league/',
        'league': 'epl',
    },
    'laliga': {
        'url': 'https://www.oddsportal.com/soccer/spain/laliga/',
        'league': 'laliga',
    },
    'seriea': {
        'url': 'https://www.oddsportal.com/soccer/italy/serie-a/',
        'league': 'seriea',
    },
    'bundesliga': {
        'url': 'https://www.oddsportal.com/soccer/germany/bundesliga/',
        'league': 'bundesliga',
    },
    'ligue1': {
        'url': 'https://www.oddsportal.com/soccer/france/ligue-1/',
        'league': 'ligue1',
    },
    'mma': {
        'url': 'https://www.oddsportal.com/mma/ufc/',
        'league': 'mma',
    },
}


def load_db_url():
    """Load database URL from environment"""
    env_paths = [
        '/var/www/html/eventheodds/.env',
        os.path.join(os.path.dirname(__file__), '..', '.env'),
    ]
    for env_path in env_paths:
        try:
            with open(env_path, 'r') as f:
                for line in f:
                    if line.startswith('SPORTS_DATABASE_URL='):
                        return line.split('=', 1)[1].strip().split('?')[0]
        except FileNotFoundError:
            continue
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def fetch_page(url, session=None):
    """Fetch page with rate limiting and random delays"""
    time.sleep(REQUEST_DELAY + random.uniform(0.5, 2.0))

    if session is None:
        session = requests.Session()

    try:
        resp = session.get(url, headers=HEADERS, timeout=30)
        if resp.status_code == 200:
            return resp.text
        elif resp.status_code == 403:
            print(f"  Blocked (403) - may need proxy or different approach")
            return None
        elif resp.status_code == 429:
            print(f"  Rate limited - waiting 60s")
            time.sleep(60)
            return fetch_page(url, session)
        else:
            print(f"  Status {resp.status_code}")
            return None
    except Exception as e:
        print(f"  Error: {e}")
        return None


def parse_american_odds(odds_str):
    """Convert odds string to American format integer"""
    if not odds_str:
        return None
    try:
        odds_str = odds_str.strip()
        # Handle decimal odds (convert to American)
        if '.' in odds_str:
            decimal = float(odds_str)
            if decimal >= 2.0:
                return int((decimal - 1) * 100)
            else:
                return int(-100 / (decimal - 1))
        # Already American
        return int(odds_str.replace('+', ''))
    except:
        return None


def scrape_oddsportal_league(league_key, session=None):
    """Scrape odds from OddsPortal for a specific league"""
    config = ODDSPORTAL_LEAGUES.get(league_key)
    if not config:
        return []

    print(f"\n[OddsPortal] Fetching {league_key.upper()} odds...")

    if session is None:
        session = requests.Session()

    html = fetch_page(config['url'], session)
    if not html:
        return []

    soup = BeautifulSoup(html, 'html.parser')
    games = []

    # OddsPortal uses various table structures
    # Look for event rows
    event_rows = soup.find_all('div', {'class': re.compile(r'eventRow|event-row|flex-center', re.I)})

    if not event_rows:
        # Try finding table rows
        tables = soup.find_all('table', {'class': re.compile(r'table|odds', re.I)})
        for table in tables:
            rows = table.find_all('tr')
            for row in rows:
                cells = row.find_all('td')
                if len(cells) >= 3:
                    try:
                        # Try to extract team names and odds
                        teams_cell = cells[0]
                        teams_text = teams_cell.get_text(separator='|').strip()

                        if '|' in teams_text or ' - ' in teams_text or ' vs ' in teams_text.lower():
                            # Split into home/away
                            for sep in ['|', ' - ', ' vs ']:
                                if sep in teams_text:
                                    parts = teams_text.split(sep)
                                    if len(parts) >= 2:
                                        home_team = parts[0].strip()
                                        away_team = parts[1].strip()
                                        break
                            else:
                                continue

                            # Extract odds from remaining cells
                            odds_values = []
                            for cell in cells[1:]:
                                odds_text = cell.get_text().strip()
                                odds_val = parse_american_odds(odds_text)
                                if odds_val:
                                    odds_values.append(odds_val)

                            if home_team and away_team and len(odds_values) >= 2:
                                game = {
                                    'homeTeam': home_team,
                                    'awayTeam': away_team,
                                    'league': config['league'],
                                    'moneylineHome': odds_values[0] if len(odds_values) > 0 else None,
                                    'moneylineAway': odds_values[-1] if len(odds_values) > 1 else None,
                                    'source': 'oddsportal',
                                    'fetchedAt': datetime.now(timezone.utc).isoformat(),
                                }
                                games.append(game)
                    except Exception as e:
                        continue

    # Also try finding JSON data in script tags (OddsPortal often embeds data)
    scripts = soup.find_all('script')
    for script in scripts:
        script_text = script.string or ''
        if 'odds' in script_text.lower() and 'home' in script_text.lower():
            # Try to extract JSON data
            try:
                # Look for JSON patterns
                json_match = re.search(r'\{[^{}]*"odds"[^{}]*\}', script_text)
                if json_match:
                    data = json.loads(json_match.group())
                    # Process extracted data
            except:
                pass

    print(f"  Found {len(games)} games with odds")
    return games


def scrape_action_network_odds(sport='ncaab'):
    """Scrape odds from Action Network (good for college sports)"""
    print(f"\n[Action Network] Fetching {sport.upper()} odds...")

    sport_urls = {
        'ncaab': 'https://www.actionnetwork.com/ncaab/odds',
        'ncaaf': 'https://www.actionnetwork.com/ncaaf/odds',
        'nba': 'https://www.actionnetwork.com/nba/odds',
        'nfl': 'https://www.actionnetwork.com/nfl/odds',
        'mlb': 'https://www.actionnetwork.com/mlb/odds',
        'nhl': 'https://www.actionnetwork.com/nhl/odds',
    }

    url = sport_urls.get(sport)
    if not url:
        return []

    try:
        time.sleep(REQUEST_DELAY)
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"  Status: {resp.status_code}")
            return []

        soup = BeautifulSoup(resp.text, 'html.parser')
        games = []

        # Action Network uses React, so data may be in JSON
        # Look for __NEXT_DATA__ script tag
        next_data = soup.find('script', {'id': '__NEXT_DATA__'})
        if next_data:
            try:
                data = json.loads(next_data.string)
                # Navigate to odds data (structure varies)
                props = data.get('props', {})
                page_props = props.get('pageProps', {})

                # Look for games/events in various locations
                events = page_props.get('events', []) or page_props.get('games', [])

                for event in events:
                    teams = event.get('teams', [])
                    odds = event.get('odds', {})

                    if len(teams) >= 2:
                        game = {
                            'homeTeam': teams[0].get('name', teams[0].get('displayName', '')),
                            'awayTeam': teams[1].get('name', teams[1].get('displayName', '')),
                            'league': sport,
                            'gameDate': event.get('startTime', event.get('date')),
                            'moneylineHome': odds.get('homeMoneyline'),
                            'moneylineAway': odds.get('awayMoneyline'),
                            'spreadHome': odds.get('homeSpread'),
                            'total': odds.get('total'),
                            'source': 'actionnetwork',
                        }
                        games.append(game)
            except Exception as e:
                print(f"  Error parsing JSON: {e}")

        # Fallback: parse HTML tables
        if not games:
            tables = soup.find_all('table')
            for table in tables:
                rows = table.find_all('tr')
                for row in rows:
                    cells = row.find_all(['td', 'th'])
                    if len(cells) >= 4:
                        try:
                            team_text = cells[0].get_text().strip()
                            if team_text and not team_text.lower() in ['team', 'matchup']:
                                games.append({
                                    'homeTeam': team_text,
                                    'league': sport,
                                    'source': 'actionnetwork',
                                })
                        except:
                            continue

        print(f"  Found {len(games)} games")
        return games

    except Exception as e:
        print(f"  Error: {e}")
        return []


def scrape_bestfightodds():
    """Scrape MMA odds from BestFightOdds"""
    print(f"\n[BestFightOdds] Fetching MMA odds...")

    url = "https://www.bestfightodds.com/"

    try:
        time.sleep(REQUEST_DELAY)
        resp = requests.get(url, headers=HEADERS, timeout=30)
        if resp.status_code != 200:
            print(f"  Status: {resp.status_code}")
            return []

        soup = BeautifulSoup(resp.text, 'html.parser')
        fights = []

        # BestFightOdds has a table structure
        tables = soup.find_all('table', {'class': re.compile(r'odds-table|event-table', re.I)})

        for table in tables:
            rows = table.find_all('tr')
            current_event = None

            for row in rows:
                # Check for event header
                header = row.find('th', {'colspan': True})
                if header:
                    current_event = header.get_text().strip()
                    continue

                cells = row.find_all('td')
                if len(cells) >= 2:
                    try:
                        fighter_cell = cells[0]
                        fighter_name = fighter_cell.get_text().strip()

                        # Get odds from sportsbooks
                        odds_cells = cells[1:]
                        odds_values = []
                        for oc in odds_cells:
                            odds_text = oc.get_text().strip()
                            odds_val = parse_american_odds(odds_text)
                            if odds_val:
                                odds_values.append(odds_val)

                        if fighter_name and odds_values:
                            fights.append({
                                'fighter': fighter_name,
                                'event': current_event,
                                'odds': odds_values[0] if odds_values else None,
                                'league': 'mma',
                                'source': 'bestfightodds',
                            })
                    except:
                        continue

        # Group into matchups (consecutive fighters are opponents)
        games = []
        for i in range(0, len(fights) - 1, 2):
            if i + 1 < len(fights):
                f1 = fights[i]
                f2 = fights[i + 1]
                games.append({
                    'homeTeam': f1['fighter'],
                    'awayTeam': f2['fighter'],
                    'moneylineHome': f1.get('odds'),
                    'moneylineAway': f2.get('odds'),
                    'event': f1.get('event'),
                    'league': 'mma',
                    'source': 'bestfightodds',
                })

        print(f"  Found {len(games)} MMA matchups")
        return games

    except Exception as e:
        print(f"  Error: {e}")
        return []


def save_odds_to_db(conn, games):
    """Save scraped odds to GameOdds table"""
    if not games:
        return 0

    cur = conn.cursor()
    saved = 0

    for game in games:
        try:
            # Generate game ID
            home = game.get('homeTeam', '')
            away = game.get('awayTeam', '')
            league = game.get('league', '')

            if not home or not away or not league:
                continue

            game_id = f"{league}_{home}_{away}_{datetime.now().strftime('%Y%m%d')}".lower().replace(' ', '_')

            cur.execute('''
                INSERT INTO "GameOdds" (
                    league, "gameId", "gameDate", "homeTeam", "awayTeam",
                    bookmaker, market, "homeOdds", "awayOdds", "fetchedAt", source
                )
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), %s)
                ON CONFLICT (league, "gameId", bookmaker, market)
                DO UPDATE SET
                    "homeOdds" = EXCLUDED."homeOdds",
                    "awayOdds" = EXCLUDED."awayOdds",
                    "fetchedAt" = NOW()
            ''', (
                league,
                game_id,
                game.get('gameDate', datetime.now(timezone.utc)),
                home,
                away,
                game.get('source', 'scraped'),
                'h2h',
                game.get('moneylineHome'),
                game.get('moneylineAway'),
                game.get('source', 'scraped'),
            ))
            saved += 1

        except Exception as e:
            conn.rollback()
            continue

    conn.commit()
    cur.close()
    return saved


def main():
    print("=" * 60)
    print("MULTI-SOURCE ODDS SCRAPER")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    db_url = load_db_url()
    all_games = []
    session = requests.Session()

    # 1. OddsPortal - multiple leagues
    for league_key in ['ncaab', 'ncaaf', 'nba', 'nfl', 'nhl', 'mlb', 'mma', 'epl']:
        games = scrape_oddsportal_league(league_key, session)
        all_games.extend(games)
        time.sleep(2)  # Extra delay between leagues

    # 2. Action Network - college sports focus
    for sport in ['ncaab', 'ncaaf']:
        games = scrape_action_network_odds(sport)
        all_games.extend(games)
        time.sleep(2)

    # 3. BestFightOdds - MMA
    mma_games = scrape_bestfightodds()
    all_games.extend(mma_games)

    # Summary
    print(f"\n{'='*60}")
    print(f"TOTAL GAMES SCRAPED: {len(all_games)}")

    # Count by source
    by_source = {}
    by_league = {}
    for g in all_games:
        src = g.get('source', 'unknown')
        lg = g.get('league', 'unknown')
        by_source[src] = by_source.get(src, 0) + 1
        by_league[lg] = by_league.get(lg, 0) + 1

    print("\nBy source:")
    for src, cnt in sorted(by_source.items(), key=lambda x: -x[1]):
        print(f"  {src}: {cnt}")

    print("\nBy league:")
    for lg, cnt in sorted(by_league.items(), key=lambda x: -x[1]):
        print(f"  {lg}: {cnt}")

    # Save to JSON
    output_dir = '/var/www/html/eventheodds/data'
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, 'scraped_odds.json')

    with open(output_path, 'w') as f:
        json.dump({
            'games': all_games,
            'count': len(all_games),
            'timestamp': datetime.now(timezone.utc).isoformat(),
            'sources': list(by_source.keys()),
        }, f, indent=2, default=str)

    print(f"\nSaved to {output_path}")

    # Save to database
    if db_url and all_games:
        try:
            conn = psycopg2.connect(db_url)
            saved = save_odds_to_db(conn, all_games)
            conn.close()
            print(f"Saved {saved} odds to database")
        except Exception as e:
            print(f"Database error: {e}")

    print("=" * 60)


if __name__ == '__main__':
    main()
