#!/usr/bin/env python3
"""
Compute Book Divergence Metrics
Identifies when sportsbooks disagree on lines.
Fixes: Q26 "Are books disagreeing on this matchup?"
Works across all sports (NBA, NFL, NHL, MLB, NCAAB, NCAAF, soccer leagues)
"""
import psycopg2
from datetime import datetime

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def compute_book_divergence():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create BookDivergence table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "BookDivergence" (
            id SERIAL PRIMARY KEY,
            league VARCHAR(20) NOT NULL,
            "gameId" TEXT,
            "gameDate" DATE,
            "homeTeam" VARCHAR(100),
            "awayTeam" VARCHAR(100),
            "marketType" VARCHAR(30),
            "bookCount" INTEGER,
            "consensusLine" NUMERIC,
            "minLine" NUMERIC,
            "maxLine" NUMERIC,
            "lineRange" NUMERIC,
            "stdDev" NUMERIC,
            "outlierBook" VARCHAR(50),
            "outlierLine" NUMERIC,
            "divergenceLevel" VARCHAR(20),
            "updatedAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE(league, "gameId", "marketType")
        )
    ''')
    conn.commit()
    print("BookDivergence table created/verified")

    # Compute divergence from OddsSnapshot (multi-book data)
    print("\nComputing book divergence from OddsSnapshot...")
    cur.execute('''
        WITH book_lines AS (
            SELECT
                league,
                "externalGameId" as game_id,
                "gameDate",
                "homeTeam",
                "awayTeam",
                COALESCE(source, bookmaker) as source,
                "spreadHome" as spread,
                total,
                "moneylineHome"
            FROM "OddsSnapshot"
            WHERE "spreadHome" IS NOT NULL
              AND "gameDate" >= NOW() - INTERVAL '7 days'
        ),
        spread_divergence AS (
            SELECT
                league,
                game_id,
                MAX("gameDate") as game_date,
                MAX("homeTeam") as home_team,
                MAX("awayTeam") as away_team,
                'SPREAD' as market_type,
                COUNT(DISTINCT source) as book_count,
                AVG(spread) as consensus_line,
                MIN(spread) as min_line,
                MAX(spread) as max_line,
                MAX(spread) - MIN(spread) as line_range,
                STDDEV(spread) as std_dev
            FROM book_lines
            WHERE spread IS NOT NULL
            GROUP BY league, game_id
            HAVING COUNT(DISTINCT source) >= 2
        ),
        total_divergence AS (
            SELECT
                league,
                game_id,
                MAX("gameDate") as game_date,
                MAX("homeTeam") as home_team,
                MAX("awayTeam") as away_team,
                'TOTAL' as market_type,
                COUNT(DISTINCT source) as book_count,
                AVG(total) as consensus_line,
                MIN(total) as min_line,
                MAX(total) as max_line,
                MAX(total) - MIN(total) as line_range,
                STDDEV(total) as std_dev
            FROM book_lines
            WHERE total IS NOT NULL
            GROUP BY league, game_id
            HAVING COUNT(DISTINCT source) >= 2
        ),
        all_divergence AS (
            SELECT * FROM spread_divergence
            UNION ALL
            SELECT * FROM total_divergence
        )
        INSERT INTO "BookDivergence"
        (league, "gameId", "gameDate", "homeTeam", "awayTeam", "marketType",
         "bookCount", "consensusLine", "minLine", "maxLine", "lineRange", "stdDev",
         "divergenceLevel")
        SELECT
            league,
            game_id,
            game_date,
            home_team,
            away_team,
            market_type,
            book_count,
            ROUND(consensus_line::numeric, 1),
            min_line,
            max_line,
            ROUND(line_range::numeric, 1),
            ROUND(std_dev::numeric, 2),
            CASE
                WHEN line_range >= 2 THEN 'HIGH'
                WHEN line_range >= 1 THEN 'MODERATE'
                WHEN line_range >= 0.5 THEN 'LOW'
                ELSE 'CONSENSUS'
            END
        FROM all_divergence
        WHERE line_range IS NOT NULL
        ON CONFLICT (league, "gameId", "marketType") DO UPDATE SET
            "bookCount" = EXCLUDED."bookCount",
            "consensusLine" = EXCLUDED."consensusLine",
            "minLine" = EXCLUDED."minLine",
            "maxLine" = EXCLUDED."maxLine",
            "lineRange" = EXCLUDED."lineRange",
            "stdDev" = EXCLUDED."stdDev",
            "divergenceLevel" = EXCLUDED."divergenceLevel",
            "updatedAt" = NOW()
    ''')
    snapshot_count = cur.rowcount
    conn.commit()
    print(f"  Book divergence from OddsSnapshot: {snapshot_count}")

    # Also compute from GameOdds if available (market-based structure)
    print("\nComputing book divergence from GameOdds...")
    cur.execute('''
        WITH spread_divergence AS (
            SELECT
                league,
                "gameId"::text as game_id,
                MAX("gameDate") as game_date,
                MAX("homeTeam") as home_team,
                MAX("awayTeam") as away_team,
                'SPREAD' as market_type,
                COUNT(DISTINCT COALESCE(source, bookmaker)) as book_count,
                AVG("lineValue") as consensus_line,
                MIN("lineValue") as min_line,
                MAX("lineValue") as max_line,
                MAX("lineValue") - MIN("lineValue") as line_range,
                STDDEV("lineValue") as std_dev
            FROM "GameOdds"
            WHERE market ILIKE '%spread%'
              AND "lineValue" IS NOT NULL
              AND "gameDate" >= NOW() - INTERVAL '7 days'
            GROUP BY league, "gameId"
            HAVING COUNT(DISTINCT COALESCE(source, bookmaker)) >= 2
        ),
        total_divergence AS (
            SELECT
                league,
                "gameId"::text as game_id,
                MAX("gameDate") as game_date,
                MAX("homeTeam") as home_team,
                MAX("awayTeam") as away_team,
                'TOTAL' as market_type,
                COUNT(DISTINCT COALESCE(source, bookmaker)) as book_count,
                AVG("lineValue") as consensus_line,
                MIN("lineValue") as min_line,
                MAX("lineValue") as max_line,
                MAX("lineValue") - MIN("lineValue") as line_range,
                STDDEV("lineValue") as std_dev
            FROM "GameOdds"
            WHERE market ILIKE '%total%'
              AND "lineValue" IS NOT NULL
              AND "gameDate" >= NOW() - INTERVAL '7 days'
            GROUP BY league, "gameId"
            HAVING COUNT(DISTINCT COALESCE(source, bookmaker)) >= 2
        ),
        all_divergence AS (
            SELECT * FROM spread_divergence
            UNION ALL
            SELECT * FROM total_divergence
        )
        INSERT INTO "BookDivergence"
        (league, "gameId", "gameDate", "homeTeam", "awayTeam", "marketType",
         "bookCount", "consensusLine", "minLine", "maxLine", "lineRange", "stdDev",
         "divergenceLevel")
        SELECT
            league,
            game_id,
            game_date,
            home_team,
            away_team,
            market_type,
            book_count,
            ROUND(consensus_line::numeric, 1),
            min_line,
            max_line,
            ROUND(line_range::numeric, 1),
            ROUND(std_dev::numeric, 2),
            CASE
                WHEN line_range >= 2 THEN 'HIGH'
                WHEN line_range >= 1 THEN 'MODERATE'
                WHEN line_range >= 0.5 THEN 'LOW'
                ELSE 'CONSENSUS'
            END
        FROM all_divergence
        WHERE line_range IS NOT NULL
        ON CONFLICT (league, "gameId", "marketType") DO NOTHING
    ''')
    gameodds_count = cur.rowcount
    conn.commit()
    print(f"  Book divergence from GameOdds: {gameodds_count}")

    # Summary
    cur.execute('''
        SELECT league, "divergenceLevel", COUNT(*)
        FROM "BookDivergence"
        GROUP BY league, "divergenceLevel"
        ORDER BY league, COUNT(*) DESC
    ''')
    print("\n\nBook Divergence Summary:")
    print("-" * 60)
    current_league = None
    for row in cur.fetchall():
        if row[0] != current_league:
            current_league = row[0]
            print(f"\n  {current_league.upper()}:")
        print(f"    {row[1]}: {row[2]} games")

    # Show high divergence games
    cur.execute('''
        SELECT league, "homeTeam", "awayTeam", "marketType", "lineRange", "bookCount"
        FROM "BookDivergence"
        WHERE "divergenceLevel" = 'HIGH'
        ORDER BY "lineRange" DESC
        LIMIT 10
    ''')
    results = cur.fetchall()
    if results:
        print("\n\nHighest Divergence Games (books disagree most):")
        print("-" * 70)
        for row in results:
            print(f"  {row[0].upper()}: {row[2]} @ {row[1]} ({row[3]}) - Range: {row[4]} pts ({row[5]} books)")

    cur.close()
    conn.close()
    print(f"\n✅ Book divergence complete: {snapshot_count + gameodds_count} records")

if __name__ == '__main__':
    compute_book_divergence()
