#!/usr/bin/env python3
"""
Convert EventheOdds JSON data files into text files and upload to the RAG service.
Reads from /data/{league}/*.json and writes .txt to /data/csv/ then uploads via API.
"""

import json
import os
import sys
import time
import requests
from pathlib import Path

DATA_DIR = Path("/var/www/html/eventheodds/data")
OUTPUT_DIR = DATA_DIR / "csv"
RAG_URL = "http://localhost:5001"
API_KEY = "eventheodds-flask-api-key-2025"

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

def fmt_game_nba(g):
    """Format an NBA/NFL/MLB/NHL game into readable text."""
    date = g.get("date", "unknown date")
    status = g.get("status", "")
    home = g.get("home_team", {})
    visitor = g.get("visitor_team", {})
    home_name = home.get("full_name", home.get("name", "Home"))
    visitor_name = visitor.get("full_name", visitor.get("name", "Away"))
    home_score = g.get("home_team_score", "?")
    visitor_score = g.get("visitor_team_score", "?")
    season = g.get("season", "")
    postseason = " (Postseason)" if g.get("postseason") else ""

    lines = [f"Game: {visitor_name} at {home_name} on {date}{postseason}"]
    lines.append(f"Season: {season}, Status: {status}")
    lines.append(f"Score: {visitor_name} {visitor_score} - {home_name} {home_score}")

    # Quarter/period scores if available
    quarters = []
    for q in ["q1", "q2", "q3", "q4"]:
        hq = g.get(f"home_{q}")
        vq = g.get(f"visitor_{q}")
        if hq is not None and vq is not None:
            quarters.append(f"{q.upper()}: {vq}-{hq}")
    if quarters:
        lines.append(f"Period scores: {', '.join(quarters)}")

    # OT
    for ot in range(1, 6):
        hot = g.get(f"home_ot{ot}")
        vot = g.get(f"visitor_ot{ot}")
        if hot is not None and vot is not None:
            lines.append(f"OT{ot}: {vot}-{hot}")

    return "\n".join(lines)


def fmt_player(p, league):
    """Format a player record."""
    name = f"{p.get('first_name', '')} {p.get('last_name', '')}".strip()
    if not name:
        name = p.get("name", "Unknown")
    team = p.get("team", {})
    team_name = team.get("full_name", team.get("name", "")) if isinstance(team, dict) else str(team)
    pos = p.get("position", "")
    height = p.get("height", p.get("height_inches", ""))
    weight = p.get("weight", p.get("weight_lbs", ""))

    lines = [f"Player: {name}"]
    if team_name:
        lines.append(f"Team: {team_name}")
    if pos:
        lines.append(f"Position: {pos}")
    if height:
        lines.append(f"Height: {height}")
    if weight:
        lines.append(f"Weight: {weight}")

    jersey = p.get("jersey_number", "")
    if jersey:
        lines.append(f"Jersey: #{jersey}")

    college = p.get("college", "")
    country = p.get("country", "")
    if college:
        lines.append(f"College: {college}")
    if country:
        lines.append(f"Country: {country}")

    draft_year = p.get("draft_year")
    draft_round = p.get("draft_round")
    draft_number = p.get("draft_number")
    if draft_year:
        lines.append(f"Draft: {draft_year} Round {draft_round} Pick {draft_number}")

    return "\n".join(lines)


def fmt_team(t):
    """Format a team record."""
    full = t.get("full_name", f"{t.get('city', '')} {t.get('name', '')}".strip())
    abbr = t.get("abbreviation", "")
    conf = t.get("conference", "")
    div = t.get("division", "")
    lines = [f"Team: {full}"]
    if abbr:
        lines.append(f"Abbreviation: {abbr}")
    if conf:
        lines.append(f"Conference: {conf}")
    if div:
        lines.append(f"Division: {div}")
    return "\n".join(lines)


def fmt_mma_fighter(f):
    """Format an MMA fighter."""
    name = f.get("name", "Unknown")
    nickname = f.get("nickname", "")
    nationality = f.get("nationality", "")
    stance = f.get("stance", "")
    w = f.get("record_wins", 0)
    l = f.get("record_losses", 0)
    d = f.get("record_draws", 0)
    weight = f.get("weight_lbs", "")
    height = f.get("height_inches", "")
    reach = f.get("reach_inches", "")

    lines = [f"Fighter: {name}"]
    if nickname:
        lines.append(f"Nickname: {nickname}")
    lines.append(f"Record: {w}-{l}-{d}")
    if nationality:
        lines.append(f"Nationality: {nationality}")
    if stance:
        lines.append(f"Stance: {stance}")
    if weight:
        lines.append(f"Weight: {weight} lbs")
    if height:
        lines.append(f"Height: {height} inches")
    if reach:
        lines.append(f"Reach: {reach} inches")
    return "\n".join(lines)


def fmt_mma_event(e):
    """Format an MMA event."""
    name = e.get("name", "Unknown Event")
    date = e.get("date", "")
    location = e.get("location", "")
    lines = [f"Event: {name}"]
    if date:
        lines.append(f"Date: {date}")
    if location:
        lines.append(f"Location: {location}")
    fights = e.get("fights", [])
    if fights:
        for fight in fights[:10]:  # limit per event
            f1 = fight.get("fighter1", {}).get("name", "?")
            f2 = fight.get("fighter2", {}).get("name", "?")
            winner = fight.get("winner", {}).get("name", "?") if fight.get("winner") else "Draw/NC"
            method = fight.get("method", "")
            rnd = fight.get("round", "")
            lines.append(f"  {f1} vs {f2} - Winner: {winner} ({method} R{rnd})")
    return "\n".join(lines)


def fmt_betting_game(g):
    """Format a historical betting record."""
    date = g.get("date", "")
    home = g.get("homeTeam", "")
    away = g.get("awayTeam", "")
    sport = g.get("sport", "")
    scores = g.get("scores", {})
    odds = g.get("odds", {})
    lines = [f"Betting Record: {away} at {home} ({sport.upper()}) on {date}"]
    if scores:
        lines.append(f"Score: {away} {scores.get('awayScore', '?')} - {home} {scores.get('homeScore', '?')}")
    if odds:
        mlh = odds.get("moneylineHome", odds.get("moneylineH"))
        mla = odds.get("moneylineAway", odds.get("moneylineA"))
        spread = odds.get("spread", odds.get("spreadHome"))
        total = odds.get("total", odds.get("overUnder"))
        if mlh:
            lines.append(f"Moneyline: Home {mlh}, Away {mla}")
        if spread:
            lines.append(f"Spread: {spread}")
        if total:
            lines.append(f"Over/Under: {total}")
    return "\n".join(lines)


def fmt_stats_player(p):
    """Format a sportsreference stats player."""
    name = p.get("playerName", "Unknown")
    league = p.get("league", "").upper()
    season = p.get("season", "")
    team = p.get("team", "")
    pos = p.get("position", "")
    gp = p.get("gamesPlayed", "")
    ppg = p.get("pointsPerGame", "")
    rpg = p.get("reboundsPerGame", "")
    apg = p.get("assistsPerGame", "")
    spg = p.get("stealsPerGame", "")
    bpg = p.get("blocksPerGame", "")
    mpg = p.get("minutesPerGame", "")
    fg = p.get("fgPercent", "")

    lines = [f"Stats: {name} ({league} {season})"]
    if team:
        lines.append(f"Team: {team}, Position: {pos}")
    if gp:
        lines.append(f"Games Played: {gp}")
    if mpg:
        lines.append(f"Minutes/Game: {mpg}")
    stat_parts = []
    if ppg:
        stat_parts.append(f"PPG: {ppg}")
    if rpg:
        stat_parts.append(f"RPG: {rpg}")
    if apg:
        stat_parts.append(f"APG: {apg}")
    if spg:
        stat_parts.append(f"SPG: {spg}")
    if bpg:
        stat_parts.append(f"BPG: {bpg}")
    if stat_parts:
        lines.append(", ".join(stat_parts))
    return "\n".join(lines)


def fmt_standing(s, league):
    """Format a league standing entry."""
    team = s.get("team", s.get("name", "Unknown"))
    rank = s.get("rank", s.get("position", ""))
    pts = s.get("points", "")
    w = s.get("wins", s.get("won", ""))
    d = s.get("draws", s.get("drawn", ""))
    l = s.get("losses", s.get("lost", ""))
    gf = s.get("goalsFor", s.get("goals_for", ""))
    ga = s.get("goalsAgainst", s.get("goals_against", ""))
    gd = s.get("goalDifference", s.get("goal_difference", ""))
    played = s.get("played", s.get("matches_played", ""))

    lines = [f"{league.upper()} Standing: #{rank} {team}"]
    if played:
        lines.append(f"Played: {played}, W: {w}, D: {d}, L: {l}")
    if pts:
        lines.append(f"Points: {pts}")
    if gf:
        lines.append(f"Goals: {gf} for, {ga} against (GD: {gd})")
    return "\n".join(lines)


def convert_and_write(name, data, formatter, batch_size=200):
    """Convert JSON data using formatter, write to txt in batches."""
    if not data:
        print(f"  [skip] {name}: no data")
        return []

    files_written = []
    total = len(data)
    batches = (total + batch_size - 1) // batch_size

    for batch_idx in range(batches):
        start = batch_idx * batch_size
        end = min(start + batch_size, total)
        batch = data[start:end]

        entries = []
        for item in batch:
            try:
                entries.append(formatter(item))
            except Exception as e:
                continue

        if not entries:
            continue

        suffix = f"_part{batch_idx + 1}" if batches > 1 else ""
        filename = f"{name}{suffix}.txt"
        filepath = OUTPUT_DIR / filename

        with open(filepath, "w", encoding="utf-8") as f:
            f.write(f"# {name.replace('_', ' ').title()} Data\n")
            f.write(f"# Records: {len(entries)} (of {total} total)\n\n")
            f.write("\n\n---\n\n".join(entries))

        files_written.append(filepath)
        print(f"  [write] {filename}: {len(entries)} records, {filepath.stat().st_size / 1024:.1f} KB")

    return files_written


def upload_file(filepath):
    """Upload a text file to the RAG service."""
    try:
        with open(filepath, "rb") as f:
            resp = requests.post(
                f"{RAG_URL}/upload",
                files={"file": (filepath.name, f, "text/plain")},
                headers={"X-API-Key": API_KEY},
                timeout=300,
            )
        if resp.status_code in (200, 202):
            data = resp.json()
            print(f"  [upload] {filepath.name}: {data.get('message', 'OK')}")
            return True
        else:
            print(f"  [error] {filepath.name}: HTTP {resp.status_code} - {resp.text[:200]}")
            return False
    except Exception as e:
        print(f"  [error] {filepath.name}: {e}")
        return False


def wait_for_processing(timeout=600):
    """Wait for all background processing to complete."""
    print("\nWaiting for RAG processing to complete...")
    start = time.time()
    while time.time() - start < timeout:
        try:
            resp = requests.get(f"{RAG_URL}/processing-status", headers={"X-API-Key": API_KEY}, timeout=10)
            if resp.status_code == 200:
                data = resp.json()
                active = data.get("active_count", 0)
                if active == 0:
                    print("All files processed!")
                    return True
                print(f"  Still processing: {active} active job(s)...")
        except Exception:
            pass
        time.sleep(10)
    print("Timed out waiting for processing")
    return False


def main():
    all_files = []
    leagues_us = ["nba", "nfl", "nhl", "mlb", "ncaab", "ncaaf", "wnba"]
    leagues_soccer = ["epl", "ucl", "seriea", "laliga", "ligue1", "bundesliga"]

    # === US Sports: Games ===
    print("\n=== Converting games data ===")
    for league in leagues_us:
        gfile = DATA_DIR / league / "games.json"
        if gfile.exists():
            data = json.load(open(gfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_games", data, fmt_game_nba, batch_size=300)
                all_files.extend(files)

    # === US Sports: Players ===
    print("\n=== Converting player data ===")
    for league in leagues_us:
        pfile = DATA_DIR / league / "players.json"
        if pfile.exists():
            data = json.load(open(pfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_players", data, lambda p: fmt_player(p, league), batch_size=300)
                all_files.extend(files)

    # === US Sports: Teams ===
    print("\n=== Converting team data ===")
    for league in leagues_us:
        tfile = DATA_DIR / league / "teams.json"
        if tfile.exists():
            data = json.load(open(tfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_teams", data, fmt_team, batch_size=500)
                all_files.extend(files)

    # === Soccer: Players ===
    print("\n=== Converting soccer player data ===")
    for league in leagues_soccer:
        pfile = DATA_DIR / league / "players.json"
        if pfile.exists():
            data = json.load(open(pfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_players", data, lambda p: fmt_player(p, league), batch_size=300)
                all_files.extend(files)

    # === Soccer: Teams ===
    print("\n=== Converting soccer team data ===")
    for league in leagues_soccer:
        tfile = DATA_DIR / league / "teams.json"
        if tfile.exists():
            data = json.load(open(tfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_teams", data, fmt_team, batch_size=500)
                all_files.extend(files)

    # === Soccer: Standings ===
    print("\n=== Converting standings data ===")
    for league in leagues_soccer:
        sfile = DATA_DIR / league / "standings.json"
        if sfile.exists():
            data = json.load(open(sfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_standings", data, lambda s: fmt_standing(s, league), batch_size=500)
                all_files.extend(files)

    # === Soccer: Games (EPL) ===
    for league in leagues_soccer:
        gfile = DATA_DIR / league / "games.json"
        if gfile.exists():
            data = json.load(open(gfile))
            if isinstance(data, list):
                files = convert_and_write(f"{league}_games", data, fmt_game_nba, batch_size=300)
                all_files.extend(files)

    # === MMA ===
    print("\n=== Converting MMA data ===")
    ffile = DATA_DIR / "mma" / "fighters.json"
    if ffile.exists():
        data = json.load(open(ffile))
        if isinstance(data, list):
            files = convert_and_write("mma_fighters", data, fmt_mma_fighter, batch_size=300)
            all_files.extend(files)

    efile = DATA_DIR / "mma" / "events.json"
    if efile.exists():
        data = json.load(open(efile))
        if isinstance(data, list):
            files = convert_and_write("mma_events", data, fmt_mma_event, batch_size=200)
            all_files.extend(files)

    # === Betting History ===
    print("\n=== Converting betting history ===")
    bfile = DATA_DIR / "betting" / "nhl_historical.json"
    if bfile.exists():
        data = json.load(open(bfile))
        if isinstance(data, list):
            files = convert_and_write("nhl_betting_history", data, fmt_betting_game, batch_size=300)
            all_files.extend(files)

    # === SportsReference Stats ===
    print("\n=== Converting sportsreference stats ===")
    sfile = DATA_DIR / "sportsreference_stats.json"
    if sfile.exists():
        data = json.load(open(sfile))
        players = data.get("players", data) if isinstance(data, dict) else data
        if isinstance(players, list):
            files = convert_and_write("sportsreference_stats", players, fmt_stats_player, batch_size=300)
            all_files.extend(files)

    # === NCAAB/NCAAF Conferences ===
    print("\n=== Converting conference data ===")
    for league in ["ncaab", "ncaaf"]:
        cfile = DATA_DIR / league / "conferences.json"
        if cfile.exists():
            data = json.load(open(cfile))
            if isinstance(data, list):
                entries = []
                for conf in data:
                    name = conf.get("name", conf.get("conference", "Unknown"))
                    teams = conf.get("teams", [])
                    team_names = [t.get("name", t.get("school", str(t))) if isinstance(t, dict) else str(t) for t in teams]
                    entries.append(f"Conference: {name}\nTeams: {', '.join(team_names[:50])}")
                if entries:
                    filename = f"{league}_conferences.txt"
                    filepath = OUTPUT_DIR / filename
                    with open(filepath, "w") as f:
                        f.write(f"# {league.upper()} Conferences\n\n")
                        f.write("\n\n---\n\n".join(entries))
                    all_files.append(filepath)
                    print(f"  [write] {filename}: {len(entries)} conferences")

    # === Summary ===
    print(f"\n{'='*60}")
    print(f"Total files created: {len(all_files)}")
    total_size = sum(f.stat().st_size for f in all_files) / (1024 * 1024)
    print(f"Total size: {total_size:.1f} MB")

    # === Upload ===
    print(f"\n=== Uploading {len(all_files)} files to RAG service ===")
    success = 0
    for filepath in all_files:
        if upload_file(filepath):
            success += 1
        time.sleep(0.5)  # pace uploads

    print(f"\nUploaded: {success}/{len(all_files)} files")

    # Wait for processing
    if success > 0:
        wait_for_processing(timeout=900)

    # Final status
    try:
        resp = requests.get(f"{RAG_URL}/status", headers={"X-API-Key": API_KEY}, timeout=10)
        if resp.status_code == 200:
            status = resp.json()
            print(f"\nFinal RAG Status:")
            print(f"  Total documents: {status.get('total_documents', 'N/A')}")
            print(f"  Total files: {status.get('total_files', 'N/A')}")
    except Exception:
        pass


if __name__ == "__main__":
    main()
