#!/usr/bin/env python3
"""
Scrape DraftKings via Network Interception
Captures API responses from the real site to extract half-line data.
"""
import json
import os
import sys
import time
import re
from datetime import datetime, timezone

VENV_PATH = '/var/www/html/eventheodds/.venv-scraper/lib/python3.12/site-packages'
if VENV_PATH not in sys.path:
    sys.path.insert(0, VENV_PATH)

import psycopg2
from playwright.sync_api import sync_playwright

SPORT_URLS = {
    'nba': 'https://sportsbook.draftkings.com/leagues/basketball/nba',
    'nfl': 'https://sportsbook.draftkings.com/leagues/football/nfl',
    'nhl': 'https://sportsbook.draftkings.com/leagues/hockey/nhl',
    'ncaab': 'https://sportsbook.draftkings.com/leagues/basketball/ncaab',
}


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def ensure_table(conn):
    cur = conn.cursor()
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "GameHalfLine" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(50) NOT NULL,
            "gameId" BIGINT,
            "gameDate" TIMESTAMP,
            "homeTeam" VARCHAR(100),
            "awayTeam" VARCHAR(100),
            period VARCHAR(10) NOT NULL,
            market VARCHAR(50) NOT NULL,
            side VARCHAR(50),
            "lineValue" FLOAT,
            "bookOdds" INT,
            bookmaker VARCHAR(50) DEFAULT 'draftkings',
            "createdAt" TIMESTAMP DEFAULT NOW(),
            "updatedAt" TIMESTAMP DEFAULT NOW()
        )
    ''')
    conn.commit()
    cur.close()


def parse_dk_api_response(data, league, conn):
    """Parse DraftKings API response and extract half-line data"""
    lines_stored = 0
    cur = conn.cursor()

    period_map = {
        '1st half': '1h', 'first half': '1h',
        '2nd half': '2h', 'second half': '2h',
        '1st quarter': '1q', '1st qtr': '1q',
        '2nd quarter': '2q', '2nd qtr': '2q',
        '3rd quarter': '3q', '3rd qtr': '3q',
        '4th quarter': '4q', '4th qtr': '4q',
        '1st period': 'p1', '2nd period': 'p2', '3rd period': 'p3',
    }

    def process_dict(d, home=None, away=None, period=None):
        nonlocal lines_stored

        if not isinstance(d, dict):
            return

        # Check for event names
        if 'name' in d and isinstance(d['name'], str):
            name = d['name']
            if ' @ ' in name:
                parts = name.split(' @ ')
                away, home = parts[0].strip(), parts[1].strip()
            elif ' vs ' in name.lower():
                parts = name.lower().split(' vs ')
                home, away = parts[0].strip(), parts[1].strip()

        # Check for category/period
        cat_name = d.get('name', '').lower()
        for pattern, period_code in period_map.items():
            if pattern in cat_name:
                period = period_code
                break

        # Check for outcomes with odds
        if 'outcomes' in d and period and (home or away):
            for outcome in d.get('outcomes', []):
                try:
                    odds_am = outcome.get('oddsAmerican', outcome.get('odds'))
                    if odds_am:
                        odds_int = int(str(odds_am).replace('+', '').replace('−', '-'))
                        line_value = outcome.get('line', outcome.get('handicap', outcome.get('points')))

                        # Determine market type
                        market_type = 'unknown'
                        label = (d.get('label', '') + d.get('name', '')).lower()
                        if 'spread' in label or 'handicap' in label:
                            market_type = 'spread'
                        elif 'total' in label or 'over' in label:
                            market_type = 'total'
                        elif 'moneyline' in label or 'winner' in label or 'money line' in label:
                            market_type = 'moneyline'

                        if market_type != 'unknown':
                            cur.execute('''
                                INSERT INTO "GameHalfLine"
                                (league, "homeTeam", "awayTeam", period, market, side, "lineValue", "bookOdds", bookmaker)
                                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 'draftkings')
                                ON CONFLICT DO NOTHING
                            ''', (
                                league,
                                home[:100] if home else None,
                                away[:100] if away else None,
                                period,
                                market_type,
                                outcome.get('label', 'unknown'),
                                float(line_value) if line_value else None,
                                odds_int,
                            ))
                            lines_stored += 1
                except Exception as e:
                    pass

        # Recurse
        for key, value in d.items():
            if isinstance(value, dict):
                process_dict(value, home, away, period)
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        process_dict(item, home, away, period)

    process_dict(data)
    conn.commit()
    cur.close()
    return lines_stored


def scrape_with_network(league, conn):
    """Scrape DraftKings by capturing network responses"""
    url = SPORT_URLS.get(league)
    if not url:
        return 0

    print(f"\n{league.upper()}: Loading {url}")
    captured_data = []

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            locale='en-US',
            timezone_id='America/New_York',
        )

        page = context.new_page()

        # Capture network responses
        def handle_response(response):
            try:
                url = response.url
                if 'api' in url and ('eventgroup' in url.lower() or 'event' in url.lower() or 'offer' in url.lower()):
                    if response.status == 200:
                        try:
                            data = response.json()
                            captured_data.append(data)
                            print(f"  Captured API response: {url[:80]}...")
                        except:
                            pass
            except:
                pass

        page.on('response', handle_response)

        try:
            page.goto(url, wait_until='networkidle', timeout=45000)
            time.sleep(5)  # Wait for async data

            # Click around to load more data
            # Try clicking on tabs for half/quarter markets
            tabs = page.query_selector_all('[data-testid*="tab"], [class*="tab"], button')
            for tab in tabs[:10]:
                try:
                    text = tab.inner_text().lower()
                    if any(x in text for x in ['half', 'quarter', 'period', '1h', '1q', '2h']):
                        tab.click()
                        time.sleep(2)
                except:
                    pass

            # Also check page content for embedded data
            content = page.content()

            # Look for __NEXT_DATA__ or similar
            next_data_match = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', content, re.DOTALL)
            if next_data_match:
                try:
                    next_data = json.loads(next_data_match.group(1))
                    captured_data.append(next_data)
                    print(f"  Captured __NEXT_DATA__")
                except:
                    pass

        except Exception as e:
            print(f"  Navigation error: {e}")

        browser.close()

    # Process captured data
    total_lines = 0
    for data in captured_data:
        lines = parse_dk_api_response(data, league, conn)
        total_lines += lines

    print(f"  Stored {total_lines} lines from {len(captured_data)} API responses")
    return total_lines


def main():
    print("=" * 60)
    print("DRAFTKINGS HALF-LINE SCRAPER (Network Capture)")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    db_url = load_db_url()
    if not db_url:
        print("ERROR: No database URL")
        return

    conn = psycopg2.connect(db_url)
    ensure_table(conn)

    total = 0
    for league in ['nba', 'nhl', 'nfl', 'ncaab']:
        lines = scrape_with_network(league, conn)
        total += lines
        time.sleep(2)

    conn.close()

    print("\n" + "=" * 60)
    print(f"TOTAL: {total} half/quarter lines captured")
    print("=" * 60)


if __name__ == '__main__':
    main()
