#!/usr/bin/env python3
"""
Ingest sportsdataverse CFB line odds (cfb_line_odds.csv) into SportsDB.SportsGame.

This file has ~1M rows of betting data with columns:
id, game_id, season, game_desc, date_time, market_type, abbr, lines, odds,
opening_lines, opening_odds, book, season_type, week

We group by game_id, extract spreads/totals/moneylines, and insert or update.
"""

import csv
import gzip
import os
import sys
from datetime import datetime
from pathlib import Path
from collections import defaultdict
import re

import psycopg2

DATA_DIR = Path('/var/www/html/eventheodds/data')
CFB_ODDS_FILE = DATA_DIR / 'sportsdataverse' / 'cfbfastR-data' / 'betting' / 'csv' / 'cfb_line_odds.csv.gz'

def get_db_connection():
    url = os.environ.get('SPORTS_DATABASE_URL')
    if not url:
        raise RuntimeError("SPORTS_DATABASE_URL not set")
    url_clean = url.split('?')[0]
    m = re.match(r'postgresql://([^:]+):([^@]+)@([^:]+):(\d+)/(.+)', url_clean)
    if not m:
        raise RuntimeError(f"Cannot parse DB URL: {url}")
    return psycopg2.connect(
        user=m.group(1),
        password=m.group(2),
        host=m.group(3),
        port=int(m.group(4)),
        database=m.group(5)
    )

def parse_float(s):
    if s is None:
        return None
    if isinstance(s, (int, float)):
        return float(s) if s == s else None  # NaN check
    s_str = str(s).strip()
    if not s_str or s_str.upper() == 'NA':
        return None
    try:
        return float(s_str)
    except:
        return None

def parse_int(s):
    f = parse_float(s)
    return int(f) if f is not None else None

def parse_game_desc(desc):
    """Parse 'Team1@Team2' into (away, home)."""
    if '@' in desc:
        parts = desc.split('@')
        return parts[0].strip(), parts[1].strip()
    return None, None

def main():
    if not CFB_ODDS_FILE.exists():
        print(f"File not found: {CFB_ODDS_FILE}")
        return

    conn = get_db_connection()
    cur = conn.cursor()

    # Read and group by game_id
    print(f"Loading {CFB_ODDS_FILE.name}...")
    games = defaultdict(lambda: {
        'season': None, 'game_desc': None, 'date_time': None,
        'spreads': {}, 'totals': {}, 'moneylines': {}, 'week': None
    })

    row_count = 0
    with gzip.open(CFB_ODDS_FILE, 'rt', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            row_count += 1
            if row_count % 200000 == 0:
                print(f"  Read {row_count} rows...")

            game_id = row.get('game_id', '').strip()
            if not game_id:
                continue

            g = games[game_id]
            g['season'] = g['season'] or parse_int(row.get('season'))
            g['game_desc'] = g['game_desc'] or row.get('game_desc', '')
            g['date_time'] = g['date_time'] or row.get('date_time', '')
            g['week'] = g['week'] or parse_int(row.get('week'))

            market = row.get('market_type', '').lower()
            abbr = row.get('abbr', '').strip()
            odds = parse_float(row.get('odds'))
            lines = parse_float(row.get('lines'))

            if market == 'spread' and abbr and lines is not None:
                g['spreads'][abbr] = lines
            elif market == 'total' and abbr:
                if abbr.lower() == 'over' and lines is not None:
                    g['totals']['over'] = lines
                elif abbr.lower() == 'under' and lines is not None:
                    g['totals']['under'] = lines
            elif market == 'money_line' and abbr and odds is not None:
                g['moneylines'][abbr] = odds

    print(f"Total rows: {row_count}, Unique games: {len(games)}")

    # Insert/update games
    inserted = 0
    updated = 0
    skipped = 0

    for game_id, g in games.items():
        away_team, home_team = parse_game_desc(g['game_desc'])
        if not away_team or not home_team:
            skipped += 1
            continue

        season = g['season']
        if not season:
            skipped += 1
            continue

        # Parse date
        dt_str = g['date_time']
        game_date = None
        if dt_str:
            try:
                game_date = datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S')
            except:
                try:
                    game_date = datetime.strptime(dt_str[:10], '%Y-%m-%d')
                except:
                    pass

        if not game_date:
            skipped += 1
            continue

        # Extract odds - use the team abbreviations
        spread_home = g['spreads'].get(home_team)
        spread_away = g['spreads'].get(away_team)
        total = g['totals'].get('over') or g['totals'].get('under')
        ml_home = parse_int(g['moneylines'].get(home_team))
        ml_away = parse_int(g['moneylines'].get(away_team))

        ext_id = f"sdv_cfb:{game_id}"

        # Try insert, on conflict update odds if null
        try:
            cur.execute("""
                INSERT INTO "SportsGame" (
                    league, season, "gameDate", "homeTeam", "awayTeam",
                    "spreadHome", "spreadAway", total, "moneylineHome", "moneylineAway",
                    "oddsSource", "oddsUpdatedAt", "externalGameId", status, "updatedAt"
                ) VALUES (
                    'ncaaf', %s, %s, %s, %s,
                    %s, %s, %s, %s, %s,
                    'sdv_cfb', NOW(), %s, 'final', NOW()
                )
                ON CONFLICT ("externalGameId") DO UPDATE SET
                    "spreadHome" = COALESCE("SportsGame"."spreadHome", EXCLUDED."spreadHome"),
                    "spreadAway" = COALESCE("SportsGame"."spreadAway", EXCLUDED."spreadAway"),
                    total = COALESCE("SportsGame".total, EXCLUDED.total),
                    "moneylineHome" = COALESCE("SportsGame"."moneylineHome", EXCLUDED."moneylineHome"),
                    "moneylineAway" = COALESCE("SportsGame"."moneylineAway", EXCLUDED."moneylineAway"),
                    "oddsSource" = COALESCE("SportsGame"."oddsSource", EXCLUDED."oddsSource"),
                    "oddsUpdatedAt" = COALESCE("SportsGame"."oddsUpdatedAt", EXCLUDED."oddsUpdatedAt"),
                    "updatedAt" = NOW()
            """, (
                season, game_date, home_team, away_team,
                spread_home, spread_away, total, ml_home, ml_away,
                ext_id
            ))
            if cur.rowcount > 0:
                inserted += 1
        except Exception as e:
            print(f"Error: {e}")
            conn.rollback()
            skipped += 1

        if inserted % 5000 == 0 and inserted > 0:
            print(f"  Progress: {inserted} inserted...")
            conn.commit()

    conn.commit()
    cur.close()
    conn.close()

    print(f"Done! Inserted/Updated: {inserted}, Skipped: {skipped}")

if __name__ == '__main__':
    main()
