#!/usr/bin/env python3
"""Build data/betting/mma_historical.json by joining:
- Kaggle UFC odds (data/kaggle/UFC_betting_odds.csv)
- BallDontLie MMA fight results (data/mma/fights.json)

Output matches the same shape as other betting/{sport}_historical.json files so the
Python backtester can treat MMA as a "game" with home/away = fighter1/fighter2.
"""

import csv
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Tuple, Optional

BASE = Path('/var/www/html/eventheodds/data')
KAGGLE_CSV = BASE / 'kaggle' / 'UFC_betting_odds.csv'
FIGHTS_JSON = BASE / 'mma' / 'fights.json'
OUT = BASE / 'betting' / 'mma_historical.json'


def _norm(s: str) -> str:
    return ' '.join((s or '').strip().lower().split())


def decimal_to_american(d: Optional[float]) -> Optional[int]:
    if d is None:
        return None
    try:
        d = float(d)
    except Exception:
        return None
    if d <= 1.0:
        return None
    if d >= 2.0:
        return int(round((d - 1.0) * 100))
    # e.g. 1.5 => -200
    return int(round(-100.0 / (d - 1.0)))


@dataclass
class FightResult:
    date: str
    f1: str
    f2: str
    winner: Optional[str]
    status: str


def load_bdl_fights() -> Dict[Tuple[str, str, str], FightResult]:
    """Map (date, fighter1, fighter2) -> result for both orderings."""
    if not FIGHTS_JSON.exists():
        raise SystemExit(f"Missing {FIGHTS_JSON}")

    data = json.load(open(FIGHTS_JSON, 'r'))
    out: Dict[Tuple[str, str, str], FightResult] = {}

    for f in data:
        ev = f.get('event') or {}
        date = (ev.get('date') or '')[:10]
        if not date:
            continue
        fighter1 = (f.get('fighter1') or {}).get('name') or ''
        fighter2 = (f.get('fighter2') or {}).get('name') or ''
        if not fighter1 or not fighter2:
            continue
        status = str(f.get('status') or '')
        winner = (f.get('winner') or {}).get('name')

        fr = FightResult(date=date, f1=fighter1, f2=fighter2, winner=winner, status=status)

        k1 = (date, _norm(fighter1), _norm(fighter2))
        k2 = (date, _norm(fighter2), _norm(fighter1))
        out[k1] = fr
        out[k2] = fr

    return out


def build():
    if not KAGGLE_CSV.exists():
        raise SystemExit(f"Missing {KAGGLE_CSV}")

    OUT.parent.mkdir(parents=True, exist_ok=True)

    bdl = load_bdl_fights()

    # Pick best (most favorable) American odds per fighter per fight from Kaggle rows
    # For favorites (negative), "best" is closest to 0 (e.g. -120 beats -200). For dogs (positive), bigger is better.
    def better_odds(curr: Optional[int], new: Optional[int]) -> Optional[int]:
        if new is None:
            return curr
        if curr is None:
            return new
        # both negative
        if curr < 0 and new < 0:
            return new if new > curr else curr
        # both positive
        if curr > 0 and new > 0:
            return new if new > curr else curr
        # mixed: keep the one that exists for that side (shouldn't happen here)
        return curr

    agg: Dict[Tuple[str, str, str], Dict[str, Optional[int]]] = {}

    with open(KAGGLE_CSV, newline='', encoding='utf-8') as f:
        r = csv.DictReader(f)
        for row in r:
            date = (row.get('event_date') or '').strip()[:10]
            f1 = (row.get('fighter_1') or '').strip()
            f2 = (row.get('fighter_2') or '').strip()
            if not date or not f1 or not f2:
                continue

            # decimal odds
            o1 = row.get('odds_1')
            o2 = row.get('odds_2')
            try:
                d1 = float(o1) if o1 else None
            except Exception:
                d1 = None
            try:
                d2 = float(o2) if o2 else None
            except Exception:
                d2 = None

            a1 = decimal_to_american(d1)
            a2 = decimal_to_american(d2)
            if a1 is None and a2 is None:
                continue

            key = (date, _norm(f1), _norm(f2))
            cur = agg.get(key) or {'f1': None, 'f2': None}
            cur['f1'] = better_odds(cur['f1'], a1)
            cur['f2'] = better_odds(cur['f2'], a2)
            agg[key] = cur

    games = []
    missing_result = 0
    matched = 0

    for (date, f1n, f2n), odds in agg.items():
        res = bdl.get((date, f1n, f2n))
        if not res or res.status != 'completed':
            missing_result += 1
            continue

        # Build "game"
        f1 = res.f1 if _norm(res.f1) == f1n else res.f2
        f2 = res.f2 if _norm(res.f2) == f2n else res.f1
        winner_name = res.winner

        winner = None
        if winner_name:
            wn = _norm(winner_name)
            if wn == _norm(f1):
                winner = 'home'
            elif wn == _norm(f2):
                winner = 'away'

        if winner is None:
            missing_result += 1
            continue

        ml_home = odds.get('f1')
        ml_away = odds.get('f2')
        has_real = ml_home is not None and ml_away is not None

        games.append({
            'id': f"mma-{date}-{f1n[:16]}-{f2n[:16]}".replace(' ', '_'),
            'sport': 'mma',
            'date': date,
            'season': int(date[:4]),
            'homeTeam': f1,
            'awayTeam': f2,
            'scores': {
                'homeScore': None,
                'awayScore': None,
            },
            'odds': {
                'moneylineHome': ml_home,
                'moneylineAway': ml_away,
                'spreadHome': None,
                'spreadAway': None,
                'spreadOddsHome': None,
                'spreadOddsAway': None,
                'totalLine': None,
                'source': 'kaggle',
            },
            'hasRealOdds': has_real,
            'result': {
                'winner': winner,
                'spreadCovered': None,
                'totalResult': None,
                'margin': 0,
                'totalPoints': 0,
            },
            'meta': {
                'source': 'kaggle+balldontlie',
            }
        })
        matched += 1

    # Sort by date
    games.sort(key=lambda g: g.get('date', ''))

    with open(OUT, 'w') as f:
        json.dump(games, f, indent=2)

    print(f"Built {OUT}")
    print(f"Kaggle fights aggregated: {len(agg)}")
    print(f"Matched completed fights w/ winner: {matched}")
    print(f"Skipped (missing result/winner): {missing_result}")


if __name__ == '__main__':
    build()
