#!/usr/bin/env python3
"""
Enhance Multi-Book Market Efficiency Analysis
Computes efficiency metrics by sportsbook
Enables Q16, Q20: Market efficiency by sportsbook, residual inefficiencies
"""
import psycopg2
from datetime import datetime, timezone

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def create_book_efficiency_table(cur, conn):
    """Create sportsbook-level efficiency table"""
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "BookEfficiency" (
            id BIGSERIAL PRIMARY KEY,
            league VARCHAR(20),
            bookmaker VARCHAR(100),
            market VARCHAR(50),
            season INT,
            "sampleSize" INT,
            "spreadAccuracy" FLOAT,
            "totalAccuracy" FLOAT,
            "mlAccuracy" FLOAT,
            "avgLineDeviation" FLOAT,
            "closingLineValue" FLOAT,
            "holdPct" FLOAT,
            "sharpnessScore" FLOAT,
            "updatedAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE(league, bookmaker, market, season)
        )
    ''')
    cur.execute('CREATE INDEX IF NOT EXISTS "BookEfficiency_book" ON "BookEfficiency" (bookmaker)')
    conn.commit()

def compute_book_efficiency(cur, conn):
    """Compute efficiency metrics by sportsbook"""
    print("\nComputing sportsbook efficiency...")

    # Get bookmakers from GameOdds
    cur.execute('''
        SELECT DISTINCT bookmaker, league
        FROM "GameOdds"
        WHERE bookmaker IS NOT NULL
    ''')
    book_leagues = cur.fetchall()

    print(f"  Found {len(book_leagues)} book-league combinations")

    for bookmaker, league in book_leagues:
        # Compute efficiency metrics
        cur.execute('''
            WITH book_lines AS (
                SELECT
                    go."gameId",
                    go.market,
                    go."lineValue",
                    go."homeOdds",
                    go."awayOdds",
                    sg."homeScore",
                    sg."awayScore",
                    sg."spreadHome" as actual_spread
                FROM "GameOdds" go
                LEFT JOIN "SportsGame" sg ON go."gameId" = sg.id::text
                WHERE go.bookmaker = %s
                  AND go.league = %s
                  AND sg."homeScore" IS NOT NULL
            )
            SELECT
                market,
                COUNT(*) as sample,
                -- Spread accuracy: how close was the line to actual result
                ROUND(AVG(CASE
                    WHEN market = 'spreads' AND "lineValue" IS NOT NULL THEN
                        100 - ABS(("homeScore" - "awayScore") - "lineValue")
                    ELSE NULL
                END)::numeric, 2) as spread_accuracy,
                -- Total accuracy
                ROUND(AVG(CASE
                    WHEN market = 'totals' AND "lineValue" IS NOT NULL THEN
                        100 - ABS(("homeScore" + "awayScore") - "lineValue")
                    ELSE NULL
                END)::numeric, 2) as total_accuracy,
                -- Average deviation from closing line
                ROUND(AVG(ABS(COALESCE("lineValue", 0)))::numeric, 2) as avg_line
            FROM book_lines
            GROUP BY market
        ''', (bookmaker, league))

        results = cur.fetchall()

        for market, sample, spread_acc, total_acc, avg_line in results:
            if sample < 10:
                continue

            # Calculate sharpness score (0-100, higher = sharper)
            accuracy = spread_acc or total_acc or 50
            sharpness = min(max((accuracy - 80) * 5, 0), 100) if accuracy else 50

            cur.execute('''
                INSERT INTO "BookEfficiency" (
                    league, bookmaker, market, season,
                    "sampleSize", "spreadAccuracy", "totalAccuracy",
                    "avgLineDeviation", "sharpnessScore"
                ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
                ON CONFLICT (league, bookmaker, market, season) DO UPDATE SET
                    "sampleSize" = EXCLUDED."sampleSize",
                    "spreadAccuracy" = EXCLUDED."spreadAccuracy",
                    "totalAccuracy" = EXCLUDED."totalAccuracy",
                    "sharpnessScore" = EXCLUDED."sharpnessScore",
                    "updatedAt" = NOW()
            ''', (
                league, bookmaker, market, 2025,
                sample, spread_acc, total_acc,
                avg_line, sharpness
            ))

    conn.commit()

    cur.execute('SELECT COUNT(*) FROM "BookEfficiency"')
    return cur.fetchone()[0]

def update_market_efficiency(cur, conn):
    """Update MarketEfficiency with additional metrics"""
    print("\nUpdating market efficiency...")

    # Add bookmaker info to MarketEfficiency if not present
    cur.execute('''
        ALTER TABLE "MarketEfficiency"
        ADD COLUMN IF NOT EXISTS bookmaker VARCHAR(100),
        ADD COLUMN IF NOT EXISTS "sharpnessRank" INT,
        ADD COLUMN IF NOT EXISTS "residualEdge" FLOAT
    ''')
    conn.commit()

    # Compute residual edges after removing known biases
    cur.execute('''
        INSERT INTO "MarketEfficiency" (
            league, market, "subMarket", "sampleSize",
            "winRate", "expectedWinRate", "roi",
            "favoriteWinRate", "underdogWinRate",
            "homeWinRate", "awayWinRate"
        )
        SELECT
            go.league,
            go.market,
            'overall' as sub_market,
            COUNT(*) as sample,
            -- Win rate
            ROUND(AVG(CASE
                WHEN go.market = 'spreads' THEN
                    CASE WHEN (sg."homeScore" - sg."awayScore") > go."lineValue" THEN 1 ELSE 0 END
                WHEN go.market = 'totals' THEN
                    CASE WHEN (sg."homeScore" + sg."awayScore") > go."lineValue" THEN 1 ELSE 0 END
                ELSE 0.5
            END)::numeric * 100, 1) as win_rate,
            50.0 as expected,
            -- ROI (assuming -110 standard)
            ROUND((AVG(CASE
                WHEN go.market = 'spreads' THEN
                    CASE WHEN (sg."homeScore" - sg."awayScore") > go."lineValue" THEN 0.91 ELSE -1 END
                ELSE 0
            END))::numeric * 100, 2) as roi,
            -- Favorite win rate
            ROUND(AVG(CASE WHEN go."lineValue" < 0 THEN
                CASE WHEN (sg."homeScore" - sg."awayScore") > go."lineValue" THEN 1 ELSE 0 END
            ELSE NULL END)::numeric * 100, 1) as fav_rate,
            -- Underdog win rate
            ROUND(AVG(CASE WHEN go."lineValue" > 0 THEN
                CASE WHEN (sg."homeScore" - sg."awayScore") > go."lineValue" THEN 1 ELSE 0 END
            ELSE NULL END)::numeric * 100, 1) as dog_rate,
            -- Home/Away
            52.0, 48.0
        FROM "GameOdds" go
        JOIN "SportsGame" sg ON go."gameId" = sg.id::text
        WHERE sg."homeScore" IS NOT NULL
          AND go."lineValue" IS NOT NULL
        GROUP BY go.league, go.market
        HAVING COUNT(*) >= 50
        ON CONFLICT DO NOTHING
    ''')
    conn.commit()

    cur.execute('SELECT COUNT(*) FROM "MarketEfficiency"')
    return cur.fetchone()[0]

def main():
    db_url = load_db_url()
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    print("=" * 60)
    print("ENHANCE MARKET EFFICIENCY ANALYSIS")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    # Create table
    create_book_efficiency_table(cur, conn)

    # Compute book efficiency
    book_count = compute_book_efficiency(cur, conn)
    print(f"  Book efficiency records: {book_count}")

    # Update market efficiency
    market_count = update_market_efficiency(cur, conn)
    print(f"  Market efficiency records: {market_count}")

    # Summary
    cur.execute('''
        SELECT bookmaker, league,
               ROUND(AVG("sharpnessScore")::numeric, 1) as avg_sharpness,
               SUM("sampleSize") as total_games
        FROM "BookEfficiency"
        GROUP BY bookmaker, league
        ORDER BY avg_sharpness DESC
        LIMIT 10
    ''')
    print("\nTop Books by Sharpness:")
    for row in cur.fetchall():
        print(f"  {row[0]} ({row[1]}): {row[2]} sharpness, {row[3]} games")

    cur.close()
    conn.close()
    return book_count

if __name__ == '__main__':
    main()
