#!/usr/bin/env python3
"""
UFC Pipeline: Transform UfcFight/UfcFighter data into PlayerGameMetric rows,
populate the Player table, and create an odds-matching view.

Connects to PostgreSQL on localhost:5433 (eventheodds_sports).
"""

import sys
import os
import tempfile
import subprocess
import json

DB_HOST = "127.0.0.1"
DB_PORT = "5433"
DB_NAME = "eventheodds_sports"
DB_USER = "eventheodds"
DB_PASS = "eventheodds_dev_password"

STAT_KEYS = [
    "knockdowns",
    "takedowns",
    "total_strikes",
    "submission_attempts",
    "control_time_sec",
    "sig_strike_pct",
    "win",
]

# Mapping from statKey to JSONB key suffix (before _1/_2)
JSONB_MAP = {
    "knockdowns": "KD",
    "takedowns": "TD",
    "total_strikes": "STR",
    "submission_attempts": "SUB",
    "control_time_sec": "Ctrl",
    "sig_strike_pct": "Sig. Str._%",
    "win": None,  # derived from Result_1/Result_2
}


def run_sql(sql, fetch=False):
    """Run SQL via psql using a tempfile (avoids shell quoting issues with CamelCase)."""
    with tempfile.NamedTemporaryFile(mode="w", suffix=".sql", delete=False) as f:
        f.write(sql)
        f.flush()
        tmppath = f.name

    env = os.environ.copy()
    env["PGPASSWORD"] = DB_PASS

    cmd = [
        "psql", "-h", DB_HOST, "-p", DB_PORT, "-U", DB_USER, "-d", DB_NAME,
        "-v", "ON_ERROR_STOP=1",
        "-f", tmppath, "--no-align", "--tuples-only", "--quiet"
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, env=env, timeout=300)
        if result.returncode != 0 or "ERROR:" in result.stderr:
            print(f"SQL ERROR (rc={result.returncode}):", file=sys.stderr)
            print(f"  stderr: {result.stderr}", file=sys.stderr)
            print(f"  stdout: {result.stdout[:500]}", file=sys.stderr)
            sys.exit(1)
        return result.stdout.strip()
    finally:
        os.unlink(tmppath)


def run_sql_rows(sql):
    """Run SQL and return list of tuples (pipe-separated)."""
    output = run_sql(sql, fetch=True)
    if not output:
        return []
    rows = []
    for line in output.split("\n"):
        line = line.strip()
        if line:
            rows.append(tuple(col.strip() for col in line.split("|")))
    return rows


def safe_float(val, default=0.0):
    """Parse a string to float, handling empty/missing values."""
    if val is None or val == "" or val == "None":
        return default
    try:
        return float(val)
    except (ValueError, TypeError):
        return default


def safe_int(val, default=0):
    """Parse a string to int, handling empty/missing and float strings."""
    if val is None or val == "" or val == "None":
        return default
    try:
        return int(float(val))
    except (ValueError, TypeError):
        return default


def step1_populate_players():
    """Insert UfcFighters into Player table."""
    print("=" * 60)
    print("STEP 1: Populating Player table with UFC fighters")
    print("=" * 60)

    sql = """
INSERT INTO "Player" (league, "externalPlayerId", name, position, team, "createdAt", "updatedAt")
SELECT 'ufc', f."fighterId", f.name, NULL, NULL, NOW(), NOW()
FROM "UfcFighter" f
ON CONFLICT (league, "externalPlayerId") DO NOTHING;
"""
    run_sql(sql)

    count = run_sql('SELECT COUNT(*) FROM "Player" WHERE league = \'ufc\';')
    print(f"  Players in Player table (league=ufc): {count}")
    return int(count)


def step2_populate_pgm():
    """Transform UfcFight JSONB data into PlayerGameMetric rows."""
    print("\n" + "=" * 60)
    print("STEP 2: Transforming UfcFight data into PlayerGameMetric")
    print("=" * 60)

    # Build a single INSERT that extracts all stats for both fighters from each fight.
    # For each fight, we produce 7 statKeys x 2 fighters = 14 rows.
    # We use UNION ALL to generate the two fighter sides, then CROSS JOIN with stat keys.

    sql = """
-- Insert fighter 1 stats
INSERT INTO "PlayerGameMetric"
  (league, season, "gameKey", "gameDate", "playerExternalId", "playerName",
   position, team, opponent, "statKey", value, "createdAt")
SELECT
  'ufc',
  EXTRACT(YEAR FROM e.date)::int,
  'ufc:' || f."eventId" || ':' || f."fightId",
  e.date,
  f."fighter1Id",
  f."fighter1Name",
  f.raw->>'Weight_Class',
  'UFC',
  f."fighter2Name",
  s.stat_key,
  CASE s.stat_key
    WHEN 'knockdowns'          THEN COALESCE(NULLIF(f.raw->>'KD_1','')::double precision, 0)
    WHEN 'takedowns'           THEN COALESCE(NULLIF(f.raw->>'TD_1','')::double precision, 0)
    WHEN 'total_strikes'       THEN COALESCE(NULLIF(f.raw->>'STR_1','')::double precision, 0)
    WHEN 'submission_attempts' THEN COALESCE(NULLIF(f.raw->>'SUB_1','')::double precision, 0)
    WHEN 'control_time_sec'    THEN COALESCE(NULLIF(f.raw->>'Ctrl_1','')::double precision, 0)
    WHEN 'sig_strike_pct'      THEN COALESCE(NULLIF(f.raw->>'Sig. Str._%_1','')::double precision, 0)
    WHEN 'win'                 THEN CASE WHEN f."result1" = 'W' THEN 1.0 ELSE 0.0 END
  END,
  NOW()
FROM "UfcFight" f
JOIN "UfcEvent" e ON e."eventId" = f."eventId"
CROSS JOIN (
  VALUES ('knockdowns'), ('takedowns'), ('total_strikes'),
         ('submission_attempts'), ('control_time_sec'), ('sig_strike_pct'), ('win')
) AS s(stat_key)
WHERE e.date IS NOT NULL
ON CONFLICT (league, season, "gameKey", "playerExternalId", "statKey")
DO UPDATE SET
  value = EXCLUDED.value,
  "playerName" = EXCLUDED."playerName",
  position = EXCLUDED.position,
  opponent = EXCLUDED.opponent;

-- Insert fighter 2 stats
INSERT INTO "PlayerGameMetric"
  (league, season, "gameKey", "gameDate", "playerExternalId", "playerName",
   position, team, opponent, "statKey", value, "createdAt")
SELECT
  'ufc',
  EXTRACT(YEAR FROM e.date)::int,
  'ufc:' || f."eventId" || ':' || f."fightId",
  e.date,
  f."fighter2Id",
  f."fighter2Name",
  f.raw->>'Weight_Class',
  'UFC',
  f."fighter1Name",
  s.stat_key,
  CASE s.stat_key
    WHEN 'knockdowns'          THEN COALESCE(NULLIF(f.raw->>'KD_2','')::double precision, 0)
    WHEN 'takedowns'           THEN COALESCE(NULLIF(f.raw->>'TD_2','')::double precision, 0)
    WHEN 'total_strikes'       THEN COALESCE(NULLIF(f.raw->>'STR_2','')::double precision, 0)
    WHEN 'submission_attempts' THEN COALESCE(NULLIF(f.raw->>'SUB_2','')::double precision, 0)
    WHEN 'control_time_sec'    THEN COALESCE(NULLIF(f.raw->>'Ctrl_2','')::double precision, 0)
    WHEN 'sig_strike_pct'      THEN COALESCE(NULLIF(f.raw->>'Sig. Str._%_2','')::double precision, 0)
    WHEN 'win'                 THEN CASE WHEN f."result2" = 'W' THEN 1.0 ELSE 0.0 END
  END,
  NOW()
FROM "UfcFight" f
JOIN "UfcEvent" e ON e."eventId" = f."eventId"
CROSS JOIN (
  VALUES ('knockdowns'), ('takedowns'), ('total_strikes'),
         ('submission_attempts'), ('control_time_sec'), ('sig_strike_pct'), ('win')
) AS s(stat_key)
WHERE e.date IS NOT NULL
ON CONFLICT (league, season, "gameKey", "playerExternalId", "statKey")
DO UPDATE SET
  value = EXCLUDED.value,
  "playerName" = EXCLUDED."playerName",
  position = EXCLUDED.position,
  opponent = EXCLUDED.opponent;
"""
    run_sql(sql)

    count = run_sql('SELECT COUNT(*) FROM "PlayerGameMetric" WHERE league = \'ufc\';')
    print(f"  PGM rows (league=ufc): {count}")

    stat_counts = run_sql_rows("""
SELECT "statKey", COUNT(*) FROM "PlayerGameMetric"
WHERE league = 'ufc' GROUP BY "statKey" ORDER BY "statKey";
""")
    for row in stat_counts:
        print(f"    {row[0]}: {row[1]}")

    return int(count)


def step3_create_odds_view():
    """Create a view joining UfcFight to UfcOdds for odds matching."""
    print("\n" + "=" * 60)
    print("STEP 3: Creating UfcFightOddsView")
    print("=" * 60)

    sql = """
DROP VIEW IF EXISTS "UfcFightOddsView";

CREATE VIEW "UfcFightOddsView" AS
SELECT
  f."fightId",
  f."eventId",
  e.name AS event_name,
  e.date AS event_date,
  f."fighter1Name",
  f."fighter2Name",
  f."weightClass",
  f."result1",
  f."result2",
  f.method,
  o.odds1,
  o.odds2,
  o."f1KoOdds",
  o."f2KoOdds",
  o."f1SubOdds",
  o."f2SubOdds",
  o."f1DecOdds",
  o."f2DecOdds",
  o.source AS odds_source,
  o."addingDate" AS odds_date
FROM "UfcFight" f
JOIN "UfcEvent" e ON e."eventId" = f."eventId"
LEFT JOIN "UfcOdds" o ON (
  -- Match by date (same day +/- 1 day)
  ABS(EXTRACT(EPOCH FROM (e.date - o."eventDate"))) < 86400 * 2
  AND (
    -- Fighter names match in either order (case-insensitive, trimmed)
    (
      LOWER(TRIM(o.fighter1)) = LOWER(TRIM(f."fighter1Name"))
      AND LOWER(TRIM(o.fighter2)) = LOWER(TRIM(f."fighter2Name"))
    )
    OR (
      LOWER(TRIM(o.fighter1)) = LOWER(TRIM(f."fighter2Name"))
      AND LOWER(TRIM(o.fighter2)) = LOWER(TRIM(f."fighter1Name"))
    )
  )
);
"""
    run_sql(sql)

    count = run_sql("""
SELECT COUNT(*) FROM "UfcFightOddsView" WHERE odds1 IS NOT NULL;
""")
    print(f"  Fights with matched odds: {count}")


def step4_summary():
    """Print summary stats."""
    print("\n" + "=" * 60)
    print("SUMMARY")
    print("=" * 60)

    player_count = run_sql('SELECT COUNT(*) FROM "Player" WHERE league = \'ufc\';')
    pgm_count = run_sql('SELECT COUNT(*) FROM "PlayerGameMetric" WHERE league = \'ufc\';')
    distinct_stats = run_sql(
        'SELECT COUNT(DISTINCT "statKey") FROM "PlayerGameMetric" WHERE league = \'ufc\';'
    )
    distinct_fighters = run_sql(
        'SELECT COUNT(DISTINCT "playerExternalId") FROM "PlayerGameMetric" WHERE league = \'ufc\';'
    )
    date_range = run_sql("""
SELECT MIN("gameDate")::date || ' to ' || MAX("gameDate")::date
FROM "PlayerGameMetric" WHERE league = 'ufc';
""")
    weight_classes = run_sql("""
SELECT COUNT(DISTINCT position) FROM "PlayerGameMetric" WHERE league = 'ufc';
""")

    print(f"  Players:           {player_count}")
    print(f"  PGM rows:          {pgm_count}")
    print(f"  Distinct stats:    {distinct_stats}")
    print(f"  Distinct fighters: {distinct_fighters}")
    print(f"  Date range:        {date_range}")
    print(f"  Weight classes:    {weight_classes}")

    # Sample: top 5 fighters by knockdowns
    print("\n  Top 5 fighters by total knockdowns:")
    rows = run_sql_rows("""
SELECT "playerName", SUM(value)::int AS total_kd, COUNT(*) AS fights
FROM "PlayerGameMetric"
WHERE league = 'ufc' AND "statKey" = 'knockdowns'
GROUP BY "playerName"
ORDER BY total_kd DESC
LIMIT 5;
""")
    for name, kd, fights in rows:
        print(f"    {name}: {kd} KD in {fights} fights")


def main():
    print("UFC Pipeline: Transform Fight Data into Standard Analytics Format")
    print("=" * 60)

    step1_populate_players()
    step2_populate_pgm()
    step3_create_odds_view()
    step4_summary()

    print("\nDone!")


if __name__ == "__main__":
    main()
