#!/usr/bin/env python3
"""
Compute Prop-Pace Correlation
Links player props to game pace for analysis.
Fixes: Q18 "Does prop correlate with game pace?"
"""
import psycopg2

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def compute_prop_pace_correlation():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create PropPaceCorrelation table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "PropPaceCorrelation" (
            id SERIAL PRIMARY KEY,
            league VARCHAR(20) NOT NULL,
            "propType" VARCHAR(50),
            "avgPace" NUMERIC,
            "avgPropLine" NUMERIC,
            "avgActual" NUMERIC,
            "hitRateHighPace" NUMERIC,
            "hitRateLowPace" NUMERIC,
            "correlation" NUMERIC,
            "sampleSize" INTEGER,
            "paceImpact" VARCHAR(30),
            "updatedAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE(league, "propType")
        )
    ''')
    conn.commit()
    print("PropPaceCorrelation table created/verified")

    # Calculate pace by team from TeamGameMetric (pivoted schema: statKey/value)
    print("\nComputing team pace metrics...")
    cur.execute('''
        CREATE TEMP TABLE team_pace AS
        SELECT
            league,
            team,
            AVG(value) as avg_pace,
            STDDEV(value) as pace_stddev,
            COUNT(*) as games
        FROM "TeamGameMetric"
        WHERE "statKey" IN ('pace', 'possessions', 'fga', 'totalPlays')
        GROUP BY league, team
        HAVING COUNT(*) >= 5
    ''')

    # Check if we have pace data
    cur.execute('SELECT COUNT(*), AVG(avg_pace) FROM team_pace')
    pace_check = cur.fetchone()
    print(f"  Teams with pace data: {pace_check[0]}, Avg pace: {pace_check[1]:.1f}" if pace_check[1] else "  No pace data found")

    if pace_check[0] == 0:
        print("\n  Note: Limited pace data available. Using TeamRollingStats scoring as pace proxy...")
        # Fall back to scoring as pace proxy (higher scoring teams = higher pace games)
        cur.execute('DROP TABLE IF EXISTS team_pace')
        cur.execute('''
            CREATE TEMP TABLE team_pace AS
            SELECT
                league,
                team,
                AVG(last5_points_for) as avg_pace,
                STDDEV(last5_points_for) as pace_stddev,
                COUNT(*) as games
            FROM "TeamRollingStats"
            WHERE last5_points_for IS NOT NULL
            GROUP BY league, team
            HAVING COUNT(*) >= 3
        ''')

    # Compute prop-pace correlations
    print("\nComputing prop-pace correlations...")

    # Get prop types
    cur.execute('''
        SELECT DISTINCT league, "propType"
        FROM "PlayerPropLine"
        WHERE "propType" IS NOT NULL
    ''')
    prop_types = cur.fetchall()

    for league, prop_type in prop_types:
        # Skip if not enough data
        cur.execute('''
            SELECT COUNT(*)
            FROM "PlayerPropLine" ppl
            WHERE ppl.league = %s AND ppl."propType" = %s
        ''', (league, prop_type))
        count = cur.fetchone()[0]

        if count < 50:
            continue

        # Calculate correlation
        # High pace = teams scoring above their average
        # Low pace = teams scoring below their average
        cur.execute('''
            WITH prop_with_pace AS (
                SELECT
                    ppl.league,
                    ppl."propType",
                    ppl."lineValue",
                    ppl."gameId",
                    tp.avg_pace,
                    CASE
                        WHEN tp.avg_pace > (SELECT AVG(avg_pace) FROM team_pace WHERE league = ppl.league)
                        THEN 'HIGH'
                        ELSE 'LOW'
                    END as pace_category
                FROM "PlayerPropLine" ppl
                LEFT JOIN "SportsGame" sg ON ppl."gameId" = sg.id
                LEFT JOIN team_pace tp ON sg."homeTeam" = tp.team AND sg.league = tp.league
                WHERE ppl.league = %s
                  AND ppl."propType" = %s
                  AND tp.avg_pace IS NOT NULL
            )
            SELECT
                league,
                "propType",
                AVG(avg_pace) as avg_pace,
                AVG("lineValue") as avg_line,
                COUNT(*) as sample_size,
                AVG(CASE WHEN pace_category = 'HIGH' THEN "lineValue" END) as high_pace_avg,
                AVG(CASE WHEN pace_category = 'LOW' THEN "lineValue" END) as low_pace_avg
            FROM prop_with_pace
            GROUP BY league, "propType"
            HAVING COUNT(*) >= 20
        ''', (league, prop_type))

        result = cur.fetchone()
        if not result:
            continue

        league, prop_type, avg_pace, avg_line, sample_size, high_pace_avg, low_pace_avg = result

        # Determine pace impact
        if high_pace_avg and low_pace_avg:
            diff = (high_pace_avg - low_pace_avg) / low_pace_avg * 100 if low_pace_avg else 0
            if diff > 5:
                pace_impact = 'POSITIVE_HIGH'
            elif diff > 2:
                pace_impact = 'POSITIVE_MODERATE'
            elif diff < -5:
                pace_impact = 'NEGATIVE_HIGH'
            elif diff < -2:
                pace_impact = 'NEGATIVE_MODERATE'
            else:
                pace_impact = 'NEUTRAL'
            correlation = diff / 100
        else:
            pace_impact = 'UNKNOWN'
            correlation = None

        cur.execute('''
            INSERT INTO "PropPaceCorrelation"
            (league, "propType", "avgPace", "avgPropLine", "sampleSize", "correlation", "paceImpact")
            VALUES (%s, %s, %s, %s, %s, %s, %s)
            ON CONFLICT (league, "propType") DO UPDATE SET
                "avgPace" = EXCLUDED."avgPace",
                "avgPropLine" = EXCLUDED."avgPropLine",
                "sampleSize" = EXCLUDED."sampleSize",
                "correlation" = EXCLUDED."correlation",
                "paceImpact" = EXCLUDED."paceImpact",
                "updatedAt" = NOW()
        ''', (league, prop_type, avg_pace, avg_line, sample_size, correlation, pace_impact))

    conn.commit()

    # Summary
    cur.execute('''
        SELECT league, "propType", "paceImpact", "sampleSize", "correlation"
        FROM "PropPaceCorrelation"
        WHERE "paceImpact" != 'NEUTRAL'
        ORDER BY ABS("correlation") DESC NULLS LAST
        LIMIT 20
    ''')
    print("\n\nTop Pace-Correlated Props:")
    print("-" * 70)
    print(f"{'League':<10} {'Prop Type':<25} {'Impact':<20} {'Sample':<10} {'Corr':<10}")
    print("-" * 70)
    for row in cur.fetchall():
        corr = f"{row[4]:.2%}" if row[4] else "N/A"
        print(f"{row[0]:<10} {row[1]:<25} {row[2]:<20} {row[3]:<10} {corr:<10}")

    cur.execute('SELECT COUNT(*) FROM "PropPaceCorrelation"')
    total = cur.fetchone()[0]

    cur.close()
    conn.close()
    print(f"\n✅ Prop-pace correlation complete: {total} prop types analyzed")

if __name__ == '__main__':
    compute_prop_pace_correlation()
