#!/usr/bin/env python3
"""Build/merge betting/nhl_historical.json from Kaggle nhl_data_plus.csv.

Source:
  data/kaggle/nhl_data_plus.csv

The Kaggle file is team-level (two rows per game_id). We group into one game:
- homeTeam / awayTeam from is_home
- scores from goals_for
- totalLine from over_under
- spreadHome from 'spread' (appears to be HOME team puck line)
- spreadAway = -spreadHome
- moneyline: only favorite_moneyline is provided; we assign it to home if spreadHome<0 else to away if spreadHome>0.

We merge by (date, homeTeam, awayTeam) into betting/nhl_historical.json, filling missing fields only.
"""

import csv
import json
from pathlib import Path
from typing import Dict, Tuple, Optional

BASE = Path('/var/www/html/eventheodds/data')
SRC = BASE / 'kaggle' / 'nhl_data_plus.csv'
OUT = BASE / 'betting' / 'nhl_historical.json'


def norm(s: str) -> str:
    return ' '.join((s or '').strip().lower().split())


def to_int(x) -> Optional[int]:
    if x is None:
        return None
    s = str(x).strip()
    if s == '' or s.lower() == 'na' or s.lower() == 'nan':
        return None
    try:
        return int(float(s))
    except Exception:
        return None


def to_float(x) -> Optional[float]:
    if x is None:
        return None
    s = str(x).strip()
    if s == '' or s.lower() == 'na' or s.lower() == 'nan':
        return None
    try:
        return float(s)
    except Exception:
        return None


def date_only(dt: str) -> Optional[str]:
    if not dt:
        return None
    dt = dt.strip()
    # format: 2004-01-02 00:00:00+00:00
    return dt.split(' ')[0]


def build_games() -> Dict[Tuple[str, str, str], dict]:
    per_game: Dict[str, dict] = {}

    with open(SRC, newline='', encoding='utf-8') as f:
        r = csv.DictReader(f)
        for row in r:
            gid = row.get('game_id')
            if not gid:
                continue
            g = per_game.setdefault(gid, {'rows': []})
            g['rows'].append(row)

    out: Dict[Tuple[str, str, str], dict] = {}

    for gid, g in per_game.items():
        rows = g['rows']
        if len(rows) < 2:
            continue
        home = next((x for x in rows if str(x.get('is_home')).strip() in ('1','True','true')), None)
        away = next((x for x in rows if str(x.get('is_home')).strip() in ('0','False','false')), None)
        if not home or not away:
            continue

        date = date_only(home.get('date') or '')
        if not date:
            continue

        home_team = (home.get('team_name') or '').strip()
        away_team = (away.get('team_name') or '').strip()
        if not home_team or not away_team:
            continue

        sh = to_int(home.get('goals_for'))
        sa = to_int(away.get('goals_for'))
        if sh is None or sa is None:
            continue

        season = to_int(home.get('season')) or int(date[:4])

        ou = to_float(home.get('over_under'))
        spread_home = to_float(home.get('spread'))
        spread_away = (-spread_home) if spread_home is not None else None

        fav_ml = to_int(home.get('favorite_moneyline'))
        ml_home = None
        ml_away = None
        if fav_ml is not None and spread_home is not None:
            if spread_home < 0:
                ml_home = fav_ml
            elif spread_home > 0:
                ml_away = fav_ml

        has_real = any(v is not None for v in (ou, spread_home, ml_home, ml_away))

        # results
        if sh > sa:
            winner = 'home'
        elif sa > sh:
            winner = 'away'
        else:
            winner = 'draw'

        spread_covered = None
        if spread_home is not None:
            margin = sh - sa
            adj = margin + spread_home
            if adj > 0:
                spread_covered = 'home'
            elif adj < 0:
                spread_covered = 'away'
            else:
                spread_covered = 'push'

        total_result = None
        if ou is not None:
            tp = sh + sa
            if tp > ou:
                total_result = 'over'
            elif tp < ou:
                total_result = 'under'
            else:
                total_result = 'push'

        rec = {
            'id': f"kaggle-nhl-{date}-{norm(away_team).replace(' ','_')}-at-{norm(home_team).replace(' ','_')}",
            'bdl_game_id': None,
            'sport': 'nhl',
            'date': date,
            'season': season,
            'homeTeam': home_team,
            'awayTeam': away_team,
            'scores': {'homeScore': sh, 'awayScore': sa},
            'odds': {
                'moneylineHome': ml_home,
                'moneylineAway': ml_away,
                'spreadHome': spread_home,
                'spreadAway': spread_away,
                'totalLine': ou,
                'source': 'kaggle',
                'spreadOddsHome': -110,
                'spreadOddsAway': -110,
            },
            'hasRealOdds': bool(has_real),
            'result': {
                'winner': winner,
                'spreadCovered': spread_covered,
                'totalResult': total_result,
                'margin': sh - sa,
                'totalPoints': sh + sa,
            },
        }

        out[(date, norm(home_team), norm(away_team))] = rec

    return out


def main():
    if not SRC.exists():
        raise SystemExit(f"Missing {SRC}")

    existing = []
    if OUT.exists():
        try:
            existing = json.load(open(OUT,'r'))
        except Exception:
            existing = []

    kaggle_games = build_games()

    idx: Dict[Tuple[str, str, str], dict] = {}
    for g in existing:
        key = (g.get('date') or '', norm(g.get('homeTeam','')), norm(g.get('awayTeam','')))
        if key[0] and key[1] and key[2]:
            idx[key] = g

    added = 0
    updated = 0

    for key, kg in kaggle_games.items():
        if key in idx:
            cur = idx[key]
            cur_odds = cur.get('odds') or {}
            new_odds = kg.get('odds') or {}

            # fill missing
            for k in ['moneylineHome','moneylineAway','spreadHome','spreadAway','totalLine','spreadOddsHome','spreadOddsAway']:
                if cur_odds.get(k) is None and new_odds.get(k) is not None:
                    cur_odds[k] = new_odds.get(k)

            if cur_odds.get('source') in (None, 'estimated_DO_NOT_BACKTEST'):
                cur_odds['source'] = 'kaggle'

            cur['odds'] = cur_odds
            cur['hasRealOdds'] = bool(cur.get('hasRealOdds') or any(cur_odds.get(k) is not None for k in ('moneylineHome','moneylineAway','spreadHome','totalLine')))
            if not cur.get('result') and kg.get('result'):
                cur['result'] = kg['result']
            updated += 1
        else:
            idx[key] = kg
            added += 1

    out = list(idx.values())
    out.sort(key=lambda x: x.get('date',''))

    OUT.parent.mkdir(parents=True, exist_ok=True)
    json.dump(out, open(OUT,'w'), indent=2)

    real = sum(1 for g in out if g.get('hasRealOdds'))
    print(f"Merged NHL Kaggle into {OUT}")
    print(f"Existing: {len(existing)} | Kaggle games: {len(kaggle_games)} | Added: {added} | Updated: {updated}")
    print(f"Total: {len(out)} | hasRealOdds: {real} ({(real/len(out)*100) if out else 0:.1f}%)")


if __name__ == '__main__':
    main()
