#!/usr/bin/env python3
"""Build/merge betting/nfl_historical.json from Kaggle spreadspoke_scores.csv.

Source file (already present on server):
  data/kaggle/spreadspoke_scores.csv

We treat spread + total as real closing lines (hasRealOdds=True when present).
Moneyline is not provided by this dataset, so remains None.

We MERGE into existing betting/nfl_historical.json by (date, homeTeam, awayTeam):
- keep existing moneylines if present
- fill spread/total if missing
- set hasRealOdds when any real line exists
"""

import csv
import json
from pathlib import Path
from typing import Dict, Tuple, Optional
from datetime import datetime

BASE = Path('/var/www/html/eventheodds/data')
SRC = BASE / 'kaggle' / 'spreadspoke_scores.csv'
OUT = BASE / 'betting' / 'nfl_historical.json'


def _norm(s: str) -> str:
    return ' '.join((s or '').strip().lower().split())


def parse_date(mdY: str) -> Optional[str]:
    if not mdY:
        return None
    mdY = mdY.strip()
    for fmt in ['%m/%d/%Y', '%m/%d/%y']:
        try:
            return datetime.strptime(mdY, fmt).strftime('%Y-%m-%d')
        except Exception:
            continue
    return None


def to_float(x: str) -> Optional[float]:
    if x is None:
        return None
    x = str(x).strip()
    if x == '' or x.lower() == 'nan':
        return None
    try:
        return float(x)
    except Exception:
        return None


def build_record(row: dict) -> Optional[dict]:
    date = parse_date(row.get('schedule_date'))
    if not date:
        return None

    season = int(row.get('schedule_season') or 0) or int(date[:4])
    week = row.get('schedule_week')

    home = (row.get('team_home') or '').strip()
    away = (row.get('team_away') or '').strip()
    if not home or not away:
        return None

    sh = row.get('score_home')
    sa = row.get('score_away')
    try:
        home_score = int(sh) if sh not in (None, '') else None
    except Exception:
        home_score = None
    try:
        away_score = int(sa) if sa not in (None, '') else None
    except Exception:
        away_score = None

    if home_score is None or away_score is None:
        return None

    # Lines
    fav = (row.get('team_favorite_id') or '').strip()  # sometimes a team name
    spread_fav = to_float(row.get('spread_favorite'))
    total_line = to_float(row.get('over_under_line'))

    spread_home = None
    spread_away = None

    # spread_favorite is "favorite's spread" (usually negative). We convert into home/away spreads.
    if spread_fav is not None and fav:
        if _norm(fav) == _norm(home):
            spread_home = spread_fav
            spread_away = -spread_fav
        elif _norm(fav) == _norm(away):
            spread_away = spread_fav
            spread_home = -spread_fav
        else:
            # if favorite doesn't match, still store as None (don't guess)
            pass

    has_real = (spread_home is not None and spread_away is not None) or (total_line is not None)

    # Result fields
    if home_score > away_score:
        winner = 'home'
    elif away_score > home_score:
        winner = 'away'
    else:
        winner = 'draw'

    spread_covered = None
    if spread_home is not None:
        margin = home_score - away_score
        # Home covers if (home_score + spread_home) > away_score
        adj = margin + spread_home
        if adj > 0:
            spread_covered = 'home'
        elif adj < 0:
            spread_covered = 'away'
        else:
            spread_covered = 'push'

    total_result = None
    if total_line is not None:
        tp = home_score + away_score
        if tp > total_line:
            total_result = 'over'
        elif tp < total_line:
            total_result = 'under'
        else:
            total_result = 'push'

    gid = f"kaggle-nfl-{date}-{_norm(away).replace(' ','_')}-at-{_norm(home).replace(' ','_')}"

    return {
        'id': gid,
        'sport': 'nfl',
        'date': date,
        'season': season,
        'week': int(week) if str(week).isdigit() else None,
        'homeTeam': home,
        'awayTeam': away,
        'scores': {
            'homeScore': home_score,
            'awayScore': away_score,
        },
        'odds': {
            'moneylineHome': None,
            'moneylineAway': None,
            'spreadHome': spread_home,
            'spreadAway': spread_away,
            'spreadOddsHome': -110,
            'spreadOddsAway': -110,
            'totalLine': total_line,
            'source': 'kaggle',
        },
        'hasRealOdds': bool(has_real),
        'result': {
            'winner': winner,
            'spreadCovered': spread_covered,
            'totalResult': total_result,
            'margin': home_score - away_score,
            'totalPoints': home_score + away_score,
        }
    }


def merge(existing: list, new: list) -> list:
    # index existing by date+teams (normalized)
    idx: Dict[Tuple[str, str, str], dict] = {}
    for g in existing:
        key = (g.get('date') or '', _norm(g.get('homeTeam','')), _norm(g.get('awayTeam','')))
        if key[0] and key[1] and key[2]:
            idx[key] = g

    added = 0
    updated = 0

    for g in new:
        key = (g.get('date') or '', _norm(g.get('homeTeam','')), _norm(g.get('awayTeam','')))
        if key in idx:
            cur = idx[key]
            # Fill missing odds fields only
            cur_odds = cur.get('odds') or {}
            new_odds = g.get('odds') or {}

            for k in ['spreadHome','spreadAway','totalLine']:
                if cur_odds.get(k) is None and new_odds.get(k) is not None:
                    cur_odds[k] = new_odds.get(k)
            # keep moneylines if present
            if cur_odds.get('moneylineHome') is None:
                cur_odds['moneylineHome'] = new_odds.get('moneylineHome')
            if cur_odds.get('moneylineAway') is None:
                cur_odds['moneylineAway'] = new_odds.get('moneylineAway')

            # odds source preference: keep existing if it's the-odds-api/live, else set kaggle
            src = cur_odds.get('source')
            if src in (None, 'estimated_DO_NOT_BACKTEST'):
                cur_odds['source'] = new_odds.get('source','kaggle')

            cur['odds'] = cur_odds
            cur['hasRealOdds'] = bool(cur.get('hasRealOdds') or g.get('hasRealOdds') or (cur_odds.get('spreadHome') is not None) or (cur_odds.get('totalLine') is not None))

            # Fill result fields if missing
            if not cur.get('result') and g.get('result'):
                cur['result'] = g['result']

            updated += 1
        else:
            idx[key] = g
            added += 1

    out = list(idx.values())
    out.sort(key=lambda x: x.get('date',''))
    return out, added, updated


def main():
    if not SRC.exists():
        raise SystemExit(f"Missing {SRC}")
    OUT.parent.mkdir(parents=True, exist_ok=True)

    existing = []
    if OUT.exists():
        try:
            existing = json.load(open(OUT,'r'))
        except Exception:
            existing = []

    new = []
    with open(SRC, newline='', encoding='utf-8') as f:
        r = csv.DictReader(f)
        for row in r:
            rec = build_record(row)
            if rec:
                new.append(rec)

    merged, added, updated = merge(existing, new)

    with open(OUT,'w') as f:
        json.dump(merged, f, indent=2)

    real = sum(1 for g in merged if g.get('hasRealOdds'))
    print(f"Merged NFL Kaggle spreadspoke into {OUT}")
    print(f"Existing: {len(existing)} | New built: {len(new)} | Added: {added} | Updated: {updated}")
    print(f"Total: {len(merged)} | hasRealOdds: {real} ({(real/len(merged)*100) if merged else 0:.1f}%)")


if __name__ == '__main__':
    main()
