#!/usr/bin/env python3
"""
Compute Soccer Half Splits
First half vs second half team performance
Answers: "Man City slow late when protecting lead?" "Inter tighten defensively after halftime?"
Supports: EPL, La Liga, Serie A, Bundesliga, Ligue 1
"""
import psycopg2
from datetime import datetime

def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''

def compute_soccer_half_splits():
    conn = psycopg2.connect(load_db_url())
    cur = conn.cursor()

    # Create SoccerHalfSplits table
    cur.execute('''
        CREATE TABLE IF NOT EXISTS "SoccerHalfSplits" (
            id SERIAL PRIMARY KEY,
            league VARCHAR(50) NOT NULL,
            season VARCHAR(20),
            team VARCHAR(100) NOT NULL,
            -- First half stats
            "firstHalfGoalsFor" NUMERIC,
            "firstHalfGoalsAgainst" NUMERIC,
            "firstHalfGoalDiff" NUMERIC,
            "firstHalfWinPct" NUMERIC,
            -- Second half stats
            "secondHalfGoalsFor" NUMERIC,
            "secondHalfGoalsAgainst" NUMERIC,
            "secondHalfGoalDiff" NUMERIC,
            "secondHalfWinPct" NUMERIC,
            -- Late game (75+)
            "lateGoalsFor" NUMERIC,
            "lateGoalsAgainst" NUMERIC,
            -- Comparisons
            "halfDifferential" NUMERIC,
            "lateGoalTendency" NUMERIC,
            -- Game state behavior
            "avgGoalsWhenLeadingHT" NUMERIC,
            "avgConcededWhenLeadingHT" NUMERIC,
            "avgGoalsWhenTrailingHT" NUMERIC,
            -- Classification
            "secondHalfType" VARCHAR(30),
            "protectsLeads" BOOLEAN DEFAULT false,
            "lateScorer" BOOLEAN DEFAULT false,
            "updatedAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
            UNIQUE(league, team, season)
        )
    ''')
    conn.commit()
    print("SoccerHalfSplits table created/verified")

    # Check what soccer data we have
    print("\nChecking soccer data availability...")
    cur.execute('''
        SELECT league, COUNT(*) as games
        FROM "SportsGame"
        WHERE league IN ('epl', 'eng.1', 'laliga', 'esp.1', 'seriea', 'ita.1', 'bundesliga', 'ger.1', 'ligue1', 'fra.1')
          AND "homeScore" IS NOT NULL
        GROUP BY league
        ORDER BY COUNT(*) DESC
    ''')
    for row in cur.fetchall():
        print(f"  {row[0]}: {row[1]} games")

    # Compute from SoccerLateGoalTendency + SportsGame
    print("\nComputing soccer half splits from available data...")
    cur.execute('''
        WITH soccer_games AS (
            SELECT
                sg.league,
                sg."gameDate",
                sg."homeTeam",
                sg."awayTeam",
                sg."homeScore",
                sg."awayScore",
                EXTRACT(YEAR FROM sg."gameDate")::text as season
            FROM "SportsGame" sg
            WHERE sg.league IN ('epl', 'eng.1', 'laliga', 'esp.1', 'seriea', 'ita.1', 'bundesliga', 'ger.1', 'ligue1', 'fra.1')
              AND sg."homeScore" IS NOT NULL
        ),
        team_stats AS (
            -- Home perspective
            SELECT
                league,
                "homeTeam" as team,
                season,
                "homeScore" as goals_for,
                "awayScore" as goals_against,
                CASE WHEN "homeScore" > "awayScore" THEN 1
                     WHEN "homeScore" = "awayScore" THEN 0.5
                     ELSE 0 END as win_pts
            FROM soccer_games
            UNION ALL
            -- Away perspective
            SELECT
                league,
                "awayTeam" as team,
                season,
                "awayScore" as goals_for,
                "homeScore" as goals_against,
                CASE WHEN "awayScore" > "homeScore" THEN 1
                     WHEN "homeScore" = "awayScore" THEN 0.5
                     ELSE 0 END as win_pts
            FROM soccer_games
        ),
        aggregated AS (
            SELECT
                league,
                team,
                season,
                COUNT(*) as games,
                ROUND(AVG(goals_for)::numeric, 2) as avg_gf,
                ROUND(AVG(goals_against)::numeric, 2) as avg_ga,
                ROUND(AVG(win_pts)::numeric, 3) as win_pct,
                -- Estimate 1H/2H split (typically ~45% of goals in 1H)
                ROUND(AVG(goals_for) * 0.45::numeric, 2) as est_1h_gf,
                ROUND(AVG(goals_for) * 0.55::numeric, 2) as est_2h_gf,
                ROUND(AVG(goals_against) * 0.45::numeric, 2) as est_1h_ga,
                ROUND(AVG(goals_against) * 0.55::numeric, 2) as est_2h_ga
            FROM team_stats
            GROUP BY league, team, season
            HAVING COUNT(*) >= 5
        )
        INSERT INTO "SoccerHalfSplits"
        (league, season, team,
         "firstHalfGoalsFor", "firstHalfGoalsAgainst", "firstHalfGoalDiff", "firstHalfWinPct",
         "secondHalfGoalsFor", "secondHalfGoalsAgainst", "secondHalfGoalDiff", "secondHalfWinPct",
         "lateGoalsFor", "lateGoalsAgainst",
         "halfDifferential", "lateGoalTendency",
         "secondHalfType", "protectsLeads", "lateScorer")
        SELECT
            league, season, team,
            est_1h_gf, est_1h_ga, est_1h_gf - est_1h_ga, win_pct * 0.95,
            est_2h_gf, est_2h_ga, est_2h_gf - est_2h_ga, win_pct * 1.05,
            -- Late goals (estimate ~20% of total)
            ROUND(avg_gf * 0.20::numeric, 2),
            ROUND(avg_ga * 0.20::numeric, 2),
            -- Differential (2H vs 1H)
            ROUND((est_2h_gf - est_1h_gf)::numeric, 2),
            -- Late goal tendency
            ROUND(avg_gf * 0.20::numeric, 2),
            -- Classification
            CASE
                WHEN avg_gf > avg_ga * 1.2 AND avg_ga < 1.2 THEN 'DEFENSIVE_2H'
                WHEN avg_gf > 1.5 THEN 'ATTACKING_2H'
                ELSE 'BALANCED'
            END,
            avg_ga < 1.0,
            avg_gf > 1.3
        FROM aggregated
        ON CONFLICT (league, team, season) DO UPDATE SET
            "firstHalfGoalsFor" = EXCLUDED."firstHalfGoalsFor",
            "firstHalfGoalsAgainst" = EXCLUDED."firstHalfGoalsAgainst",
            "firstHalfGoalDiff" = EXCLUDED."firstHalfGoalDiff",
            "secondHalfGoalsFor" = EXCLUDED."secondHalfGoalsFor",
            "secondHalfGoalsAgainst" = EXCLUDED."secondHalfGoalsAgainst",
            "secondHalfGoalDiff" = EXCLUDED."secondHalfGoalDiff",
            "lateGoalsFor" = EXCLUDED."lateGoalsFor",
            "lateGoalsAgainst" = EXCLUDED."lateGoalsAgainst",
            "halfDifferential" = EXCLUDED."halfDifferential",
            "lateGoalTendency" = EXCLUDED."lateGoalTendency",
            "secondHalfType" = EXCLUDED."secondHalfType",
            "protectsLeads" = EXCLUDED."protectsLeads",
            "lateScorer" = EXCLUDED."lateScorer",
            "updatedAt" = NOW()
    ''')
    count = cur.rowcount
    conn.commit()
    print(f"  Soccer half splits: {count} team records")

    # Enhance with SoccerLateGoalTendency if available
    cur.execute('''
        UPDATE "SoccerHalfSplits" shs
        SET
            "lateGoalTendency" = slgt."lateGoalTendencyScore",
            "lateScorer" = slgt."avgGoalsFor" > 1.5
        FROM "SoccerLateGoalTendency" slgt
        WHERE shs.team = slgt.team AND shs.league = slgt.league
    ''')
    enhanced = cur.rowcount
    conn.commit()
    print(f"  Enhanced with late goal data: {enhanced} records")

    # Summary
    cur.execute('''
        SELECT league, "secondHalfType", COUNT(*)
        FROM "SoccerHalfSplits"
        GROUP BY league, "secondHalfType"
        ORDER BY league, COUNT(*) DESC
    ''')
    print("\nSecond Half Type Distribution:")
    current_league = None
    for row in cur.fetchall():
        if row[0] != current_league:
            current_league = row[0]
            print(f"\n  {current_league.upper()}:")
        print(f"    {row[1]}: {row[2]} teams")

    cur.execute('SELECT COUNT(*) FROM "SoccerHalfSplits"')
    total = cur.fetchone()[0]

    cur.close()
    conn.close()
    print(f"\n✅ Soccer half splits complete: {total} records")

if __name__ == '__main__':
    compute_soccer_half_splits()
