#!/usr/bin/env python3
"""Normalize soccer league historical files into the standard betting schema.

Handles two BallDontLie soccer game schemas:
- Schema A (nested teams): {date, home_team, visitor_team, home_score/away_score}
- Schema B (IDs): {kickoff, home_team_id, away_team_id, home_score, away_score, season}

For each league in LEAGUES:
- Input primary: data/<league>/games.json
- Input secondary: existing data/betting/<league>_historical.json (odds in various schemas)
- Output: data/betting/<league>_historical.json (standard schema)
"""

import json
from pathlib import Path
from typing import Dict, Optional, Tuple

BASE = Path('/var/www/html/eventheodds/data')
BETTING = BASE / 'betting'

LEAGUES = ['epl','bundesliga','laliga','ligue1','seriea','ucl']


def norm(s: str) -> str:
    return ' '.join((s or '').strip().lower().split())


def to_float(x) -> Optional[float]:
    if x is None:
        return None
    s = str(x).strip()
    if not s or s.lower() in ('na','nan'):
        return None
    try:
        return float(s)
    except Exception:
        return None


def to_int(x) -> Optional[int]:
    if x is None:
        return None
    s = str(x).strip()
    if not s or s.lower() in ('na','nan'):
        return None
    try:
        return int(float(s))
    except Exception:
        return None


def extract_odds_any(rec: dict) -> dict:
    # Standard form already
    if isinstance(rec.get('odds'), dict) and ('moneylineHome' in rec.get('odds') or 'totalLine' in rec.get('odds')):
        o = dict(rec.get('odds') or {})
        o.setdefault('source', 'unknown')
        return o

    if any(k in rec for k in ('home_ml','away_ml','spread','total')):
        return {
            'moneylineHome': to_int(rec.get('home_ml')),
            'moneylineAway': to_int(rec.get('away_ml')),
            'spreadHome': to_float(rec.get('spread')),
            'spreadAway': None,
            'totalLine': to_float(rec.get('total')),
            'source': (rec.get('odds') or {}).get('source') if isinstance(rec.get('odds'), dict) else (rec.get('source') or 'the-odds-api'),
        }

    o = rec.get('odds') if isinstance(rec.get('odds'), dict) else {}
    sportsbooks = o.get('sportsbooks') if isinstance(o.get('sportsbooks'), dict) else {}
    preferred = ['fanduel','draftkings','betmgm','bovada','lowvig','betonlineag','betus']
    chosen = None
    for k in preferred:
        if k in sportsbooks:
            chosen = sportsbooks[k]
            break
    if not chosen and sportsbooks:
        chosen = next(iter(sportsbooks.values()))
    if isinstance(chosen, dict):
        return {
            'moneylineHome': to_int(chosen.get('home_ml')),
            'moneylineAway': to_int(chosen.get('away_ml')),
            'spreadHome': to_float(chosen.get('spread')),
            'spreadAway': None,
            'totalLine': to_float(chosen.get('total')),
            'source': o.get('source') or 'the-odds-api',
        }

    return {
        'moneylineHome': None,
        'moneylineAway': None,
        'spreadHome': None,
        'spreadAway': None,
        'totalLine': None,
        'source': 'none',
    }


def main():
    for league in LEAGUES:
        league_dir = BASE / league
        games_file = league_dir / 'games.json'
        teams_file = league_dir / 'teams.json'
        out_file = BETTING / f"{league}_historical.json"

        if not games_file.exists():
            print(f"{league}: no games.json yet (skipping rebuild)")
            continue

        games = json.load(open(games_file))

        team_by_id = {}
        if teams_file.exists():
            try:
                teams = json.load(open(teams_file))
                for t in teams:
                    tid = t.get('id')
                    name = t.get('name')
                    if tid is not None and name:
                        team_by_id[int(tid)] = name
            except Exception:
                pass

        odds_lookup: Dict[Tuple[str,str,str], dict] = {}
        if out_file.exists():
            try:
                existing = json.load(open(out_file))
                for r in existing:
                    date = (r.get('date') or '')[:10]
                    home = r.get('homeTeam') or r.get('home_team')
                    away = r.get('awayTeam') or r.get('away_team')
                    if not date or not home or not away:
                        continue
                    odds_lookup[(date, norm(home), norm(away))] = extract_odds_any(r)
            except Exception:
                pass

        out = []
        matched = 0
        for g in games:
            # schema B
            if g.get('kickoff') and (g.get('home_team_id') is not None) and (g.get('away_team_id') is not None):
                date = str(g.get('kickoff'))[:10]
                home = team_by_id.get(int(g.get('home_team_id'))) or str(g.get('home_team_id'))
                away = team_by_id.get(int(g.get('away_team_id'))) or str(g.get('away_team_id'))
                hs = g.get('home_score') or 0
                a_s = g.get('away_score') or 0
                season_val = g.get('season')
                season_int = int(season_val) if isinstance(season_val, int) or (isinstance(season_val, str) and season_val.isdigit()) else int(date[:4])
                gid = g.get('id')
            else:
                # schema A
                date = (g.get('date') or '')[:10]
                home_obj = g.get('home_team') or {}
                away_obj = g.get('visitor_team') or {}
                home = home_obj.get('full_name') or home_obj.get('name') or g.get('home_team_name')
                away = away_obj.get('full_name') or away_obj.get('name') or g.get('visitor_team_name')
                hs = g.get('home_team_score') or g.get('home_score') or 0
                a_s = g.get('visitor_team_score') or g.get('away_score') or 0
                season_val = g.get('season')
                season_int = int(season_val) if isinstance(season_val, int) or (isinstance(season_val, str) and str(season_val).isdigit()) else (int(date[:4]) if date else None)
                gid = g.get('id')

            if not date or not home or not away:
                continue
            if hs == 0 and a_s == 0:
                continue
            if not season_int:
                season_int = int(date[:4])

            odds = odds_lookup.get((date, norm(home), norm(away)))
            if odds:
                matched += 1
            else:
                odds = {
                    'moneylineHome': None,
                    'moneylineAway': None,
                    'spreadHome': None,
                    'spreadAway': None,
                    'totalLine': None,
                    'source': 'none',
                }

            if odds.get('spreadHome') is not None and odds.get('spreadAway') is None:
                odds['spreadAway'] = -float(odds['spreadHome'])

            has_real = any(odds.get(k) is not None for k in ('moneylineHome','moneylineAway','spreadHome','totalLine')) and odds.get('source') not in ('none', None)

            margin = hs - a_s
            winner = 'home' if margin > 0 else ('away' if margin < 0 else 'draw')

            out.append({
                'id': f"{league}-{gid}" if gid is not None else f"{league}-{date}-{norm(away)}-at-{norm(home)}",
                'bdl_game_id': gid,
                'sport': league,
                'league': league,
                'date': date,
                'season': season_int,
                'homeTeam': home,
                'awayTeam': away,
                'scores': {'homeScore': hs, 'awayScore': a_s},
                'odds': odds,
                'hasRealOdds': bool(has_real),
                'result': {'winner': winner, 'margin': margin, 'totalPoints': hs + a_s}
            })

        out.sort(key=lambda x: x.get('date',''))
        BETTING.mkdir(parents=True, exist_ok=True)
        json.dump(out, open(out_file,'w'), indent=2)
        real = sum(1 for g in out if g.get('hasRealOdds'))
        print(f"{league}: wrote {len(out)} games, matched_odds={matched}, hasRealOdds={real}")


if __name__ == '__main__':
    main()
