#!/usr/bin/env python3
"""
Create NFLDriveStats table for run abandonment analysis.
Derives quarter-level rushing tendencies from PlayerGameMetric where available.
"""

import psycopg2
from datetime import datetime

DB_URL = "postgresql://eventheodds:eventheodds_dev_password@127.0.0.1:5433/eventheodds_sports"

def main():
    conn = psycopg2.connect(DB_URL)
    cur = conn.cursor()

    # Create table for NFL drive/quarter stats
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "NFLDriveStats" (
            id BIGSERIAL PRIMARY KEY,
            league TEXT DEFAULT 'nfl',
            "gameId" BIGINT,
            "gameDate" TIMESTAMP,
            team TEXT NOT NULL,
            quarter INTEGER,
            "gameState" TEXT,
            "rushAttempts" INTEGER DEFAULT 0,
            "rushYards" INTEGER DEFAULT 0,
            "passAttempts" INTEGER DEFAULT 0,
            "passYards" INTEGER DEFAULT 0,
            "rushPct" DOUBLE PRECISION,
            "scoreDifferential" INTEGER,
            "isTrailing" BOOLEAN DEFAULT FALSE,
            "createdAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE("gameId", team, quarter)
        )
    ''')

    cur.execute('CREATE INDEX IF NOT EXISTS idx_nfl_drive_team ON "NFLDriveStats"(team)')
    cur.execute('CREATE INDEX IF NOT EXISTS idx_nfl_drive_quarter ON "NFLDriveStats"(quarter)')

    conn.commit()
    print("NFLDriveStats table created.")

    # Create NFL run tendency analysis table (aggregated)
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "NFLRunTendency" (
            id BIGSERIAL PRIMARY KEY,
            league TEXT DEFAULT 'nfl',
            season INTEGER NOT NULL,
            team TEXT NOT NULL,
            "gamesPlayed" INTEGER DEFAULT 0,
            "totalRushAtt" INTEGER DEFAULT 0,
            "totalRushYards" INTEGER DEFAULT 0,
            "totalPassAtt" INTEGER DEFAULT 0,
            "totalPassYards" INTEGER DEFAULT 0,
            "rushPctOverall" DOUBLE PRECISION,
            "rushPctWhenLeading" DOUBLE PRECISION,
            "rushPctWhenTrailing" DOUBLE PRECISION,
            "rushPctQ1" DOUBLE PRECISION,
            "rushPctQ4" DOUBLE PRECISION,
            "runAbandonmentScore" DOUBLE PRECISION,
            "avgPointsFor" DOUBLE PRECISION,
            "avgPointsAgainst" DOUBLE PRECISION,
            "updatedAt" TIMESTAMP DEFAULT NOW(),
            UNIQUE(league, season, team)
        )
    ''')

    conn.commit()
    print("NFLRunTendency table created.")

    # Check if we have NFL rushing data in PlayerGameMetric
    cur.execute('''
        SELECT COUNT(*) FROM "PlayerGameMetric"
        WHERE league = 'nfl' AND "statKey" IN ('rushAttempts', 'rushYards', 'passAttempts', 'passYards')
    ''')
    nfl_stats = cur.fetchone()[0]
    print(f"Found {nfl_stats} NFL rushing/passing stats in PlayerGameMetric")

    if nfl_stats > 0:
        # Compute team-level rushing tendencies from player stats
        print("Computing NFL run tendencies from player stats...")
        cur.execute('''
            INSERT INTO "NFLRunTendency"
            (league, season, team, "gamesPlayed", "totalRushAtt", "totalRushYards",
             "totalPassAtt", "totalPassYards", "rushPctOverall")
            SELECT
                'nfl' as league,
                season,
                team,
                COUNT(DISTINCT "gameKey") as games,
                SUM(CASE WHEN "statKey" = 'rushAttempts' THEN value ELSE 0 END)::int as rush_att,
                SUM(CASE WHEN "statKey" = 'rushYards' THEN value ELSE 0 END)::int as rush_yds,
                SUM(CASE WHEN "statKey" = 'passAttempts' THEN value ELSE 0 END)::int as pass_att,
                SUM(CASE WHEN "statKey" = 'passYards' THEN value ELSE 0 END)::int as pass_yds,
                ROUND(SUM(CASE WHEN "statKey" = 'rushAttempts' THEN value ELSE 0 END)::numeric /
                      NULLIF(SUM(CASE WHEN "statKey" IN ('rushAttempts', 'passAttempts') THEN value ELSE 0 END), 0) * 100, 1)
            FROM "PlayerGameMetric"
            WHERE league = 'nfl' AND "statKey" IN ('rushAttempts', 'rushYards', 'passAttempts', 'passYards')
            GROUP BY season, team
            HAVING COUNT(DISTINCT "gameKey") >= 5
            ON CONFLICT (league, season, team) DO UPDATE SET
                "gamesPlayed" = EXCLUDED."gamesPlayed",
                "totalRushAtt" = EXCLUDED."totalRushAtt",
                "totalRushYards" = EXCLUDED."totalRushYards",
                "totalPassAtt" = EXCLUDED."totalPassAtt",
                "totalPassYards" = EXCLUDED."totalPassYards",
                "rushPctOverall" = EXCLUDED."rushPctOverall",
                "updatedAt" = NOW()
        ''')
        conn.commit()

    # Also derive from game scores - teams that win big tend to run more
    print("Computing run tendency proxy from game outcomes...")
    cur.execute('''
        INSERT INTO "NFLRunTendency"
        (league, season, team, "gamesPlayed", "avgPointsFor", "avgPointsAgainst", "runAbandonmentScore")
        SELECT
            'nfl' as league,
            season,
            team,
            COUNT(*) as games,
            ROUND(AVG(points_for)::numeric, 1) as avg_pf,
            ROUND(AVG(points_against)::numeric, 1) as avg_pa,
            -- Run abandonment score: high score differential + high total = likely ran more
            ROUND((AVG(points_for) - AVG(points_against))::numeric +
                  (AVG(points_for + points_against) - 45)::numeric / 10, 2) as run_abandon_score
        FROM (
            SELECT season, "homeTeam" as team, "homeScore" as points_for, "awayScore" as points_against
            FROM "SportsGame" WHERE league = 'nfl' AND "homeScore" IS NOT NULL
            UNION ALL
            SELECT season, "awayTeam" as team, "awayScore" as points_for, "homeScore" as points_against
            FROM "SportsGame" WHERE league = 'nfl' AND "awayScore" IS NOT NULL
        ) t
        GROUP BY season, team
        ON CONFLICT (league, season, team) DO UPDATE SET
            "gamesPlayed" = GREATEST(EXCLUDED."gamesPlayed", "NFLRunTendency"."gamesPlayed"),
            "avgPointsFor" = EXCLUDED."avgPointsFor",
            "avgPointsAgainst" = EXCLUDED."avgPointsAgainst",
            "runAbandonmentScore" = EXCLUDED."runAbandonmentScore",
            "updatedAt" = NOW()
    ''')

    conn.commit()

    cur.execute('SELECT COUNT(*) FROM "NFLRunTendency"')
    count = cur.fetchone()[0]
    print(f"NFLRunTendency: {count} team-season records created.")

    # Show sample
    cur.execute('''
        SELECT team, season, "gamesPlayed", "rushPctOverall", "runAbandonmentScore"
        FROM "NFLRunTendency"
        WHERE season >= 2023
        ORDER BY "runAbandonmentScore" DESC NULLS LAST
        LIMIT 5
    ''')
    print("\nTeams with highest run tendency scores (2023+):")
    for row in cur.fetchall():
        rush_pct = f"{row[3]}%" if row[3] else "N/A"
        score = row[4] if row[4] else "N/A"
        print(f"  {row[0]} ({row[1]}): {row[2]} games, Rush%: {rush_pct}, Score: {score}")

    cur.close()
    conn.close()

if __name__ == "__main__":
    main()
