#!/usr/bin/env python3
"""
Prop Line Snapshot Worker — Captures genuine closing line data for CLV analysis.

PROBLEM: PPL rows are created via createMany({skipDuplicates:true}) with a unique key
         that includes oddsAmerican. Each odds change creates a NEW row, but snapshotAt
         is rarely set and hoursToGame is never set. PropLineHistory had fake timestamps.

SOLUTION: Every 2 hours, query SGO API for upcoming events with current player prop
          odds, and write genuine snapshots to PropLineHistory with actual hoursToGame.
          This gives us a real timeline of odds movement for CLV calculation.

RUN: PM2 process (auto-restart, never goes offline)
"""
import os
import sys
import time
import psycopg2
import requests
import traceback
from datetime import datetime, timezone, timedelta
from collections import defaultdict

# Add scripts directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from dotenv import load_dotenv

load_dotenv('/var/www/html/eventheodds/.env')

# SGO API config
API_KEY = os.getenv('SPORTSGAMEODDS_API_KEY', '47d6ce020d896ece307a284e8c78ff7f')
BASE_URL = os.getenv('SPORTSGAMEODDS_BASE_URL', 'https://api.sportsgameodds.com/v2')
HEADER_NAME = os.getenv('SPORTSGAMEODDS_HEADER', 'x-api-key')
DB_URL = os.getenv('SPORTS_DATABASE_URL', 'postgresql://eventheodds:eventheodds_dev_password@127.0.0.1:5433/eventheodds_sports').split('?')[0]

# Leagues to snapshot: canonical_name → SGO API leagueID
# Verified against SGO API — LALIGA/SERIEA/LIGUE1/UCL return 400
LEAGUES = {
    'nba': 'NBA',
    'nhl': 'NHL',
    'mlb': 'MLB',
    'ncaab': 'NCAAB',
    'epl': 'EPL',
    'bundesliga': 'BUNDESLIGA',
    'la_liga': 'LA_LIGA',
    'serie_a': 'IT_SERIE_A',
    'ligue_1': 'FR_LIGUE_1',
    'champions_league': 'UEFA_CHAMPIONS_LEAGUE',
    'mls': 'MLS',
}

# Prop stat types we care about (maps SGO statID → our propType)
PROP_STATS = {
    'points': 'points',
    'rebounds': 'rebounds',
    'assists': 'assists',
    'threePointersMade': 'threePointersMade',
    'steals': 'steals',
    'blocks': 'blocks',
    'turnovers': 'turnovers',
    'points+rebounds+assists': 'points+rebounds+assists',
    'fantasyScore': 'fantasyScore',
    'goals': 'goals',
    'shots_onGoal': 'shots_onGoal',
    'shots_on_goal': 'shots_onGoal',
    'shots': 'shots',
    'saves': 'saves',
}

# How often to snapshot (seconds)
SNAPSHOT_INTERVAL = 2 * 3600  # 2 hours


class SGOClient:
    """Lightweight SGO API client for prop snapshots."""

    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({HEADER_NAME: API_KEY})

    def get_upcoming_events(self, league_id, limit=50):
        """Get upcoming events with odds for a league."""
        url = f"{BASE_URL}/events"
        params = {
            'leagueID': league_id,
            'limit': limit,
            'oddsAvailable': 'true',
            'started': 'false',
        }
        try:
            resp = self.session.get(url, params=params, timeout=30)
            if resp.status_code == 400:
                print(f"  SGO API 400 for {league_id} — invalid leagueID?", flush=True)
                return []
            resp.raise_for_status()
            data = resp.json()
            return data.get('data', [])
        except Exception as e:
            print(f"  SGO API error for {league_id}: {e}", flush=True)
            return []


def extract_player_props(event):
    """Extract player prop odds from an SGO event.

    Returns list of dicts: {playerExternalId, propType, lineValue, oddsAmerican, oddId}
    Only extracts over-side game-level player props.
    """
    odds = event.get('odds', {})
    if not odds:
        return []

    props = []
    for odd_id, odd_data in odds.items():
        stat_id = odd_data.get('statID', '')
        entity_id = odd_data.get('statEntityID', '')
        period_id = odd_data.get('periodID', '')
        bet_type = odd_data.get('betTypeID', '')
        side_id = odd_data.get('sideID', '')

        # We only want: player-level, full-game, over/under, over side
        if period_id != 'game' or bet_type != 'ou' or side_id != 'over':
            continue

        # Skip team-level props (home/away)
        if entity_id in ('home', 'away', 'all', ''):
            continue

        # Must be a stat we track
        prop_type = PROP_STATS.get(stat_id)
        if not prop_type:
            continue

        # Parse odds and line — prefer book odds over fair odds
        book_odds_str = odd_data.get('bookOdds')
        fair_odds_str = odd_data.get('fairOdds')
        line_value_str = odd_data.get('bookOverUnder') or odd_data.get('fairOverUnder')

        odds_american = None
        if book_odds_str:
            try:
                odds_american = int(float(book_odds_str))
            except (ValueError, TypeError):
                pass
        if odds_american is None and fair_odds_str:
            try:
                odds_american = int(float(fair_odds_str))
            except (ValueError, TypeError):
                pass

        line_value = None
        if line_value_str:
            try:
                line_value = float(line_value_str)
            except (ValueError, TypeError):
                pass

        if odds_american is None or line_value is None:
            continue

        props.append({
            'playerExternalId': entity_id,
            'propType': prop_type,
            'lineValue': line_value,
            'oddsAmerican': odds_american,
            'oddId': odd_id,
        })

    return props


def build_game_map(cur, events, canon_league):
    """Map SGO eventIDs to SportsGame rows.

    Strategy 1: Direct lookup via externalGameId = 'sgo:{eventID}'
    Strategy 2: Fallback — match by homeTeam + gameDate (±1 day)
    Returns: {sgo_event_id: (sg_id, game_date)}
    """
    sg_map = {}

    # Strategy 1: Direct sgo: prefix lookup
    sgo_event_ids = [evt['eventID'] for evt in events]
    if sgo_event_ids:
        placeholders = ','.join(['%s'] * len(sgo_event_ids))
        cur.execute(f'''
            SELECT "externalGameId", id, "gameDate"
            FROM "SportsGame"
            WHERE "externalGameId" IN ({placeholders})
        ''', [f'sgo:{eid}' for eid in sgo_event_ids])
        for ext_id, sg_id, game_date in cur.fetchall():
            sgo_eid = ext_id.replace('sgo:', '')
            sg_map[sgo_eid] = (sg_id, game_date)

    # Strategy 2: For unmatched events, try homeTeam + date matching
    unmatched = [evt for evt in events if evt['eventID'] not in sg_map]
    if unmatched:
        for event in unmatched:
            eid = event['eventID']
            starts_at = event.get('status', {}).get('startsAt', '')
            teams = event.get('teams', {})
            home_names = teams.get('home', {}).get('names', {})
            home_team = home_names.get('long', '') or home_names.get('medium', '') or home_names.get('short', '')

            if not home_team or not starts_at:
                continue

            # Parse game date
            try:
                game_dt = datetime.fromisoformat(starts_at.replace('Z', '+00:00'))
                game_date_str = game_dt.strftime('%Y-%m-%d')
            except (ValueError, TypeError):
                continue

            # Match by league + homeTeam (case-insensitive) + date (±1 day)
            cur.execute('''
                SELECT id, "gameDate"
                FROM "SportsGame"
                WHERE league = %s
                  AND LOWER("homeTeam") = LOWER(%s)
                  AND "gameDate"::date BETWEEN %s::date - 1 AND %s::date + 1
                LIMIT 1
            ''', (canon_league, home_team, game_date_str, game_date_str))
            row = cur.fetchone()
            if row:
                sg_map[eid] = (row[0], row[1])

    return sg_map


def snapshot_cycle(client):
    """Run one full snapshot cycle across all leagues."""
    now = datetime.now(timezone.utc)
    print(f"\n{'='*65}", flush=True)
    print(f"PROP SNAPSHOT CYCLE — {now.strftime('%Y-%m-%d %H:%M:%S UTC')}", flush=True)
    print(f"{'='*65}", flush=True)

    conn = psycopg2.connect(DB_URL)
    total_snapshots = 0
    total_new = 0
    total_changed = 0
    total_skipped = 0

    try:
        cur = conn.cursor()

        for canon_league, sgo_league_id in LEAGUES.items():
            events = client.get_upcoming_events(sgo_league_id)
            if not events:
                print(f"  [{canon_league}] No upcoming events", flush=True)
                continue

            # Build SGO eventID → SportsGame mapping
            sg_map = build_game_map(cur, events, canon_league)
            matched = len(sg_map)
            unmatched = len(events) - matched

            league_new = 0
            league_changed = 0
            league_skipped = 0
            league_total = 0

            for event in events:
                eid = event['eventID']
                mapping = sg_map.get(eid)
                if not mapping:
                    continue

                sg_id, game_date = mapping

                # Calculate actual hoursToGame
                hours_to_game = None
                if game_date:
                    game_dt = game_date.replace(tzinfo=timezone.utc) if game_date.tzinfo is None else game_date
                    hours_to_game = max(0, round((game_dt - now).total_seconds() / 3600, 2))

                props = extract_player_props(event)
                if not props:
                    continue

                for prop in props:
                    pid = prop['playerExternalId']
                    pt = prop['propType']
                    lv = prop['lineValue']
                    odds = prop['oddsAmerican']

                    # Check if we already have a recent snapshot with same odds
                    cur.execute('''
                        SELECT "oddsAmerican"
                        FROM "PropLineHistory"
                        WHERE "playerExternalId" = %s
                          AND "gameId" = %s
                          AND "propType" = %s
                          AND "lineValue" = %s
                        ORDER BY "snapshotAt" DESC
                        LIMIT 1
                    ''', (pid, str(sg_id), pt, lv))
                    last = cur.fetchone()

                    if last and last[0] == odds:
                        league_skipped += 1
                        continue

                    is_new = last is None
                    is_changed = last is not None and last[0] != odds
                    previous_odds = last[0] if last else None

                    # Write new snapshot
                    cur.execute('''
                        INSERT INTO "PropLineHistory"
                        (league, "gameId", "playerExternalId", "propType",
                         "snapshotAt", "hoursToGame", "lineValue", "oddsAmerican",
                         "previousLine", "lineChange", "isOpening", "createdAt")
                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
                    ''', (
                        canon_league, str(sg_id), pid, pt,
                        now, hours_to_game, lv, odds,
                        previous_odds,
                        (odds - previous_odds) if previous_odds else None,
                        is_new,
                    ))

                    league_total += 1
                    if is_new:
                        league_new += 1
                    if is_changed:
                        league_changed += 1

                # Commit per event to avoid huge transactions
                conn.commit()

            total_snapshots += league_total
            total_new += league_new
            total_changed += league_changed
            total_skipped += league_skipped
            print(f"  [{canon_league}] {len(events)} events, {matched} mapped "
                  f"({unmatched} unmatched) → {league_total} snapshots "
                  f"({league_new} new, {league_changed} changed, {league_skipped} unchanged)",
                  flush=True)

        # Mark isClosingLine on PPL for recently started games
        cur.execute('''
            UPDATE "PlayerPropLine" ppl
            SET "isClosingLine" = TRUE
            FROM (
                SELECT DISTINCT ON (plh."playerExternalId", plh."gameId", plh."propType", plh."lineValue")
                    plh."playerExternalId", plh."gameId"::bigint AS game_id,
                    plh."propType", plh."lineValue"
                FROM "PropLineHistory" plh
                JOIN "SportsGame" sg ON plh."gameId" = sg.id::text
                WHERE sg."gameDate" BETWEEN NOW() - INTERVAL '3 hours' AND NOW()
                ORDER BY plh."playerExternalId", plh."gameId", plh."propType", plh."lineValue",
                         plh."hoursToGame" ASC NULLS LAST
            ) closing
            WHERE ppl."playerExternalId" = closing."playerExternalId"
              AND ppl."gameId" = closing.game_id
              AND ppl."propType" = closing."propType"
              AND ppl."lineValue" = closing."lineValue"
              AND ppl."isClosingLine" IS NOT TRUE
        ''')
        closing_marked = cur.rowcount
        conn.commit()
        if closing_marked > 0:
            print(f"  Marked {closing_marked} PPL rows as isClosingLine", flush=True)

        cur.close()

    except Exception as e:
        conn.rollback()
        print(f"  ERROR in snapshot cycle: {e}", flush=True)
        traceback.print_exc()
    finally:
        conn.close()

    print(f"\n  TOTAL: {total_snapshots} snapshots ({total_new} new, "
          f"{total_changed} changed, {total_skipped} unchanged/skipped)", flush=True)
    return total_snapshots


def main():
    """Main loop — runs forever as a PM2 worker."""
    print("=" * 65, flush=True)
    print("PROP SNAPSHOT WORKER STARTING", flush=True)
    print(f"Interval: {SNAPSHOT_INTERVAL}s ({SNAPSHOT_INTERVAL/3600:.1f}h)", flush=True)
    print(f"Leagues: {', '.join(LEAGUES.keys())}", flush=True)
    print(f"DB: {DB_URL[:50]}...", flush=True)
    print("=" * 65, flush=True)

    client = SGOClient()

    # Check API connectivity
    try:
        resp = client.session.get(f"{BASE_URL}/account/usage", timeout=10)
        usage = resp.json()
        print(f"SGO API: credits remaining = {usage.get('creditsRemaining', '?')}", flush=True)
    except Exception as e:
        print(f"WARNING: SGO API check failed: {e}", flush=True)

    # Verify DB connectivity and PropLineHistory exists
    try:
        conn = psycopg2.connect(DB_URL)
        cur = conn.cursor()
        cur.execute('SELECT COUNT(*) FROM "PropLineHistory"')
        count = cur.fetchone()[0]
        print(f"PropLineHistory: {count:,} existing rows", flush=True)

        # Ensure indexes exist for efficient dedup + CLV queries
        cur.execute('''CREATE INDEX IF NOT EXISTS "PropLineHistory_player_game"
                       ON "PropLineHistory" ("playerExternalId", "gameId")''')
        cur.execute('''CREATE INDEX IF NOT EXISTS "PropLineHistory_snapshot"
                       ON "PropLineHistory" ("snapshotAt")''')
        cur.execute('''CREATE INDEX IF NOT EXISTS "PropLineHistory_league_game"
                       ON "PropLineHistory" (league, "gameId")''')
        cur.execute('''CREATE INDEX IF NOT EXISTS "PropLineHistory_clv_lookup"
                       ON "PropLineHistory" ("playerExternalId", "gameId", "propType", "lineValue", "snapshotAt" DESC)''')
        conn.commit()
        conn.close()
        print("DB: OK, indexes verified", flush=True)
    except Exception as e:
        print(f"DB connection failed: {e}", flush=True)
        sys.exit(1)

    cycle = 0
    while True:
        cycle += 1
        try:
            count = snapshot_cycle(client)
            print(f"\nCycle {cycle} complete. Next in {SNAPSHOT_INTERVAL/3600:.1f}h. Sleeping...", flush=True)
        except Exception as e:
            print(f"\nCycle {cycle} FAILED: {e}", flush=True)
            traceback.print_exc()
            print(f"Will retry in {SNAPSHOT_INTERVAL/3600:.1f}h...", flush=True)

        time.sleep(SNAPSHOT_INTERVAL)


if __name__ == '__main__':
    main()
