#!/usr/bin/env python3
"""Build/merge betting/nba_historical.json from Kaggle nba_2008-2025.csv.

Source:
  data/kaggle/nba_2008-2025.csv

Provides: spreads, totals, moneylines, scores.
We merge into betting/nba_historical.json by (date, homeTeam, awayTeam).
"""

import csv
import json
from pathlib import Path
from typing import Dict, Tuple, Optional

BASE = Path('/var/www/html/eventheodds/data')
SRC = BASE / 'kaggle' / 'nba_2008-2025.csv'
OUT = BASE / 'betting' / 'nba_historical.json'

TEAM_MAP = {
    'atl': 'Atlanta Hawks',
    'bkn': 'Brooklyn Nets',
    'bos': 'Boston Celtics',
    'cha': 'Charlotte Hornets',
    'chi': 'Chicago Bulls',
    'cle': 'Cleveland Cavaliers',
    'dal': 'Dallas Mavericks',
    'den': 'Denver Nuggets',
    'det': 'Detroit Pistons',
    'gs': 'Golden State Warriors',
    'hou': 'Houston Rockets',
    'ind': 'Indiana Pacers',
    'lac': 'Los Angeles Clippers',
    'lal': 'Los Angeles Lakers',
    'mem': 'Memphis Grizzlies',
    'mia': 'Miami Heat',
    'mil': 'Milwaukee Bucks',
    'min': 'Minnesota Timberwolves',
    'no': 'New Orleans Pelicans',
    'ny': 'New York Knicks',
    'okc': 'Oklahoma City Thunder',
    'orl': 'Orlando Magic',
    'phi': 'Philadelphia 76ers',
    'phx': 'Phoenix Suns',
    'por': 'Portland Trail Blazers',
    'sa': 'San Antonio Spurs',
    'sac': 'Sacramento Kings',
    'tor': 'Toronto Raptors',
    'utah': 'Utah Jazz',
    'wsh': 'Washington Wizards',
}


def _norm(s: str) -> str:
    return ' '.join((s or '').strip().lower().split())


def to_int(x) -> Optional[int]:
    if x is None:
        return None
    s = str(x).strip()
    if s == '' or s.lower() == 'nan':
        return None
    try:
        return int(float(s))
    except Exception:
        return None


def to_float(x) -> Optional[float]:
    if x is None:
        return None
    s = str(x).strip()
    if s == '' or s.lower() == 'nan':
        return None
    try:
        return float(s)
    except Exception:
        return None


def build_record(row: dict) -> Optional[dict]:
    date = (row.get('date') or '').strip()
    if not date:
        return None

    home_code = (row.get('home') or '').strip().lower()
    away_code = (row.get('away') or '').strip().lower()
    if not home_code or not away_code:
        return None

    home = TEAM_MAP.get(home_code, home_code.upper())
    away = TEAM_MAP.get(away_code, away_code.upper())

    sh = to_int(row.get('score_home'))
    sa = to_int(row.get('score_away'))
    if sh is None or sa is None:
        return None

    season = to_int(row.get('season')) or int(date[:4])

    spread = to_float(row.get('spread'))
    total = to_float(row.get('total'))
    fav = (row.get('whos_favored') or '').strip().lower()  # 'home' or 'away'

    spread_home = None
    spread_away = None
    if spread is not None and fav in ('home', 'away'):
        s = abs(spread)
        if fav == 'home':
            spread_home = -s
            spread_away = s
        else:
            spread_away = -s
            spread_home = s

    ml_home = to_int(row.get('moneyline_home'))
    ml_away = to_int(row.get('moneyline_away'))

    has_real = any(v is not None for v in (spread_home, spread_away, total, ml_home, ml_away))

    # results
    if sh > sa:
        winner = 'home'
    elif sa > sh:
        winner = 'away'
    else:
        winner = 'draw'

    spread_covered = None
    if spread_home is not None:
        margin = sh - sa
        adj = margin + spread_home
        if adj > 0:
            spread_covered = 'home'
        elif adj < 0:
            spread_covered = 'away'
        else:
            spread_covered = 'push'

    total_result = None
    if total is not None:
        tp = sh + sa
        if tp > total:
            total_result = 'over'
        elif tp < total:
            total_result = 'under'
        else:
            total_result = 'push'

    gid = f"kaggle-nba-{date}-{_norm(away).replace(' ','_')}-at-{_norm(home).replace(' ','_')}"

    return {
        'id': gid,
        'bdl_game_id': None,
        'sport': 'nba',
        'date': date,
        'season': season,
        'homeTeam': home,
        'awayTeam': away,
        'scores': {
            'homeScore': sh,
            'awayScore': sa,
        },
        'odds': {
            'moneylineHome': ml_home,
            'moneylineAway': ml_away,
            'spreadHome': spread_home,
            'spreadAway': spread_away,
            'spreadOddsHome': -110,
            'spreadOddsAway': -110,
            'totalLine': total,
            'source': 'kaggle',
        },
        'hasRealOdds': bool(has_real),
        'result': {
            'winner': winner,
            'spreadCovered': spread_covered,
            'totalResult': total_result,
            'margin': sh - sa,
            'totalPoints': sh + sa,
        },
    }


def merge(existing: list, new: list):
    idx: Dict[Tuple[str, str, str], dict] = {}
    for g in existing:
        key = (g.get('date') or '', _norm(g.get('homeTeam','')), _norm(g.get('awayTeam','')))
        if key[0] and key[1] and key[2]:
            idx[key] = g

    added = 0
    updated = 0

    for g in new:
        key = (g.get('date') or '', _norm(g.get('homeTeam','')), _norm(g.get('awayTeam','')))
        if key in idx:
            cur = idx[key]
            cur_odds = cur.get('odds') or {}
            new_odds = g.get('odds') or {}

            # fill missing
            for k in ['moneylineHome','moneylineAway','spreadHome','spreadAway','totalLine','spreadOddsHome','spreadOddsAway']:
                if cur_odds.get(k) is None and new_odds.get(k) is not None:
                    cur_odds[k] = new_odds.get(k)

            # source preference: keep existing if the-odds-api
            if cur_odds.get('source') in (None, 'estimated_DO_NOT_BACKTEST'):
                cur_odds['source'] = new_odds.get('source','kaggle')

            cur['odds'] = cur_odds

            # keep bdl_game_id if present
            if not cur.get('bdl_game_id') and g.get('bdl_game_id'):
                cur['bdl_game_id'] = g['bdl_game_id']

            cur['hasRealOdds'] = bool(cur.get('hasRealOdds') or g.get('hasRealOdds') or any(cur_odds.get(k) is not None for k in ('moneylineHome','moneylineAway','spreadHome','totalLine')))
            if not cur.get('result') and g.get('result'):
                cur['result'] = g['result']

            updated += 1
        else:
            idx[key] = g
            added += 1

    out = list(idx.values())
    out.sort(key=lambda x: x.get('date',''))
    return out, added, updated


def main():
    if not SRC.exists():
        raise SystemExit(f"Missing {SRC}")
    OUT.parent.mkdir(parents=True, exist_ok=True)

    existing = []
    if OUT.exists():
        try:
            existing = json.load(open(OUT,'r'))
        except Exception:
            existing = []

    new = []
    with open(SRC, newline='', encoding='utf-8') as f:
        r = csv.DictReader(f)
        for row in r:
            rec = build_record(row)
            if rec:
                new.append(rec)

    merged, added, updated = merge(existing, new)

    with open(OUT,'w') as f:
        json.dump(merged, f, indent=2)

    real = sum(1 for g in merged if g.get('hasRealOdds'))
    print(f"Merged NBA Kaggle into {OUT}")
    print(f"Existing: {len(existing)} | New built: {len(new)} | Added: {added} | Updated: {updated}")
    print(f"Total: {len(merged)} | hasRealOdds: {real} ({(real/len(merged)*100) if merged else 0:.1f}%)")


if __name__ == '__main__':
    main()
