#!/usr/bin/env python3
"""
Ingest sportsdataverse WNBA schedules (wehoop-wnba-data) into SportsDB.SportsGame.

Source: data/sportsdataverse/wehoop-wnba-data/wnba/schedules/parquet/*.parquet
"""

import os
import re
import sys
from pathlib import Path
from datetime import datetime

import pyarrow.parquet as pq
import psycopg2

DATA_DIR = Path('/var/www/html/eventheodds/data/sportsdataverse/wehoop-wnba-data/wnba/schedules/parquet')

def get_db_connection():
    url = os.environ.get('SPORTS_DATABASE_URL')
    if not url:
        raise RuntimeError("SPORTS_DATABASE_URL not set")
    url_clean = url.split('?')[0]
    m = re.match(r'postgresql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)', url_clean)
    if not m:
        raise RuntimeError(f"Cannot parse DB URL: {url}")
    return psycopg2.connect(
        user=m.group(1),
        password=m.group(2),
        host=m.group(3),
        port=int(m.group(4)),
        database=m.group(5)
    )

def safe_int(val):
    if val is None:
        return None
    try:
        return int(val)
    except:
        return None

def safe_str(val):
    if val is None:
        return None
    return str(val).strip()

def main():
    conn = get_db_connection()
    cur = conn.cursor()

    parquet_files = sorted(DATA_DIR.glob('wnba_schedule_*.parquet'))
    print(f"Found {len(parquet_files)} WNBA schedule files")

    total_inserted = 0
    total_skipped = 0

    for pf in parquet_files:
        table = pq.read_table(pf)
        df = table.to_pandas()
        print(f"Processing {pf.name}: {len(df)} games")

        for _, row in df.iterrows():
            game_id = safe_str(row.get('game_id'))
            if not game_id:
                total_skipped += 1
                continue

            season = safe_int(row.get('season'))
            home_team = safe_str(row.get('home_abbreviation')) or safe_str(row.get('home_name'))
            away_team = safe_str(row.get('away_abbreviation')) or safe_str(row.get('away_name'))
            home_score = safe_int(row.get('home_score'))
            away_score = safe_int(row.get('away_score'))

            # Parse game date
            game_date = None
            gdt = row.get('game_date_time')
            if gdt is not None:
                try:
                    if hasattr(gdt, 'to_pydatetime'):
                        game_date = gdt.to_pydatetime()
                    else:
                        game_date = datetime.fromisoformat(str(gdt).replace('Z', '+00:00'))
                except:
                    pass

            if not game_date:
                gd = row.get('game_date')
                if gd:
                    try:
                        game_date = datetime.strptime(str(gd)[:10], '%Y-%m-%d')
                    except:
                        pass

            if not game_date or not home_team or not away_team:
                total_skipped += 1
                continue

            # Determine status
            status_name = safe_str(row.get('status_type_name')) or ''
            if 'final' in status_name.lower():
                status = 'final'
            elif 'progress' in status_name.lower():
                status = 'in_progress'
            else:
                status = 'scheduled'

            ext_id = f"sdv_wnba:{game_id}"

            try:
                cur.execute("""
                    INSERT INTO "SportsGame" (
                        league, season, "gameDate", "homeTeam", "awayTeam",
                        "homeScore", "awayScore", status, "externalGameId", "updatedAt"
                    ) VALUES (
                        'wnba', %s, %s, %s, %s,
                        %s, %s, %s, %s, NOW()
                    )
                    ON CONFLICT ("externalGameId") DO UPDATE SET
                        "homeScore" = COALESCE(EXCLUDED."homeScore", "SportsGame"."homeScore"),
                        "awayScore" = COALESCE(EXCLUDED."awayScore", "SportsGame"."awayScore"),
                        status = EXCLUDED.status,
                        "updatedAt" = NOW()
                """, (
                    season, game_date, home_team, away_team,
                    home_score, away_score, status, ext_id
                ))
                if cur.rowcount > 0:
                    total_inserted += 1
            except Exception as e:
                print(f"Error: {e}")
                conn.rollback()
                total_skipped += 1

        conn.commit()

    cur.close()
    conn.close()

    print(f"Done! Inserted/Updated: {total_inserted}, Skipped: {total_skipped}")

if __name__ == '__main__':
    main()
