#!/usr/bin/env python3
"""
Ingest Kaggle NCAA basketball odds (ncaa_main_lines.csv) into SportsDB.SportsGame.

Matches by team names + date, updates odds fields where NULL.
"""

import csv
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path

# Add prisma path
sys.path.insert(0, '/var/www/html/eventheodds')

import psycopg2

DATA_DIR = Path('/var/www/html/eventheodds/data')
KAGGLE_FILE = DATA_DIR / 'kaggle' / 'ncaa_main_lines.csv'

def get_db_connection():
    url = os.environ.get('SPORTS_DATABASE_URL')
    if not url:
        raise RuntimeError("SPORTS_DATABASE_URL not set")
    # Parse postgres URL
    # postgresql://user:pass@host:port/db?schema=public
    import re
    # Remove query params
    url_clean = url.split('?')[0]
    m = re.match(r'postgresql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)', url_clean)
    if not m:
        raise RuntimeError(f"Cannot parse DB URL: {url}")
    return psycopg2.connect(
        user=m.group(1),
        password=m.group(2),
        host=m.group(3),
        port=int(m.group(4)),
        database=m.group(5)
    )

def decimal_to_american(dec):
    """Convert decimal odds to American."""
    if dec is None or dec <= 1:
        return None
    if dec >= 2.0:
        return int((dec - 1) * 100)
    else:
        return int(-100 / (dec - 1))

def parse_float(s):
    if not s or s.strip() == '':
        return None
    try:
        return float(s)
    except:
        return None

def normalize_team(name):
    """Normalize team name for matching."""
    return ' '.join(name.lower().strip().split())

def main():
    if not KAGGLE_FILE.exists():
        print(f"File not found: {KAGGLE_FILE}")
        return

    conn = get_db_connection()
    cur = conn.cursor()

    # Load CSV
    rows = []
    with open(KAGGLE_FILE, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            rows.append(row)

    print(f"Loaded {len(rows)} rows from {KAGGLE_FILE.name}")

    # Dedupe by (team1, team2, date) - keep first occurrence
    seen = set()
    unique_rows = []
    for row in rows:
        team1 = row.get('team1', '').strip()
        team2 = row.get('team2', '').strip()
        timestamp = row.get('timestamp', '')[:10]
        key = (team1, team2, timestamp)
        if key not in seen:
            seen.add(key)
            unique_rows.append(row)

    print(f"Unique games: {len(unique_rows)}")

    inserted = 0
    skipped = 0

    for row in unique_rows:
        team1 = row.get('team1', '').strip()
        team2 = row.get('team2', '').strip()
        timestamp = row.get('timestamp', '')

        if not team1 or not team2 or not timestamp:
            skipped += 1
            continue

        # Parse odds
        spread_home = parse_float(row.get('team1_spread'))
        spread_away = parse_float(row.get('team2_spread'))
        total = parse_float(row.get('over_total'))
        ml_home_dec = parse_float(row.get('team1_moneyline'))
        ml_away_dec = parse_float(row.get('team2_moneyline'))

        ml_home = decimal_to_american(ml_home_dec)
        ml_away = decimal_to_american(ml_away_dec)

        try:
            game_date = datetime.strptime(timestamp[:19], '%Y-%m-%d %H:%M:%S')
        except:
            try:
                game_date = datetime.strptime(timestamp[:10], '%Y-%m-%d')
            except:
                skipped += 1
                continue

        # Extract season from date
        year = game_date.year
        month = game_date.month
        season = year if month >= 9 else year - 1  # NCAA season spans fall-spring

        # Create external game ID for deduplication
        ext_id = f"kaggle_ncaa:{team1}:{team2}:{timestamp[:10]}"

        # Insert with ON CONFLICT DO NOTHING
        try:
            cur.execute("""
                INSERT INTO "SportsGame" (
                    league, season, "gameDate", "homeTeam", "awayTeam",
                    "spreadHome", "spreadAway", total, "moneylineHome", "moneylineAway",
                    "oddsSource", "oddsUpdatedAt", "externalGameId", status, "updatedAt"
                ) VALUES (
                    'ncaab', %s, %s, %s, %s,
                    %s, %s, %s, %s, %s,
                    'kaggle_ncaa', NOW(), %s, 'scheduled', NOW()
                )
                ON CONFLICT ("externalGameId") DO NOTHING
            """, (
                season, game_date, team1, team2,
                spread_home, spread_away, total, ml_home, ml_away,
                ext_id
            ))
            if cur.rowcount > 0:
                inserted += 1
        except Exception as e:
            print(f"Error inserting {team1} vs {team2}: {e}")
            conn.rollback()  # Reset transaction on error
            skipped += 1

    conn.commit()
    cur.close()
    conn.close()

    print(f"Inserted: {inserted}, Skipped: {skipped}")

if __name__ == '__main__':
    main()
