#!/usr/bin/env python3
"""Import UFC betting odds from Kaggle dataset into our historical database."""
import csv
import json
from pathlib import Path
from collections import defaultdict

DATA_DIR = Path("/var/www/html/eventheodds/data/betting")
KAGGLE_FILE = Path("/var/www/html/eventheodds/data/kaggle/UFC_betting_odds.csv")

def import_ufc_odds():
    """Import Kaggle UFC odds and merge with existing MMA data."""
    mma_file = DATA_DIR / "mma_historical.json"
    
    # Load existing MMA data
    existing = []
    if mma_file.exists():
        with open(mma_file) as f:
            existing = json.load(f)
    
    # Track existing fights by fighter names + date
    existing_keys = set()
    for fight in existing:
        f1 = fight.get("fighter1", "").lower()
        f2 = fight.get("fighter2", "").lower()
        date = fight.get("date", "")[:10]
        existing_keys.add((f1, f2, date))
        existing_keys.add((f2, f1, date))  # Both orderings
    
    print(f"Existing MMA records: {len(existing)}")
    print(f"Loading Kaggle UFC data from {KAGGLE_FILE}...")
    
    # Group by fight (multiple odds per fight from different sources)
    fights = defaultdict(list)
    
    with open(KAGGLE_FILE, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            fighter1 = row.get("fighter_1", "").strip()
            fighter2 = row.get("fighter_2", "").strip()
            event_date = row.get("event_date", "").strip()
            
            if not fighter1 or not fighter2 or not event_date:
                continue
            
            # Check if already exists
            key = (fighter1.lower(), fighter2.lower(), event_date)
            if key in existing_keys:
                continue
            
            fight_key = (fighter1, fighter2, event_date)
            fights[fight_key].append(row)
    
    print(f"Unique fights from Kaggle: {len(fights)}")
    
    # Convert to our format
    new_fights = []
    for (f1, f2, date), odds_list in fights.items():
        # Get best odds from all sources
        sportsbooks = {}
        best_odds1 = None
        best_odds2 = None
        
        for row in odds_list:
            source = row.get("source", "unknown")
            region = row.get("region", "us")
            book_key = f"{source}_{region}"
            
            try:
                odds1 = float(row.get("odds_1", 0)) if row.get("odds_1") else None
                odds2 = float(row.get("odds_2", 0)) if row.get("odds_2") else None
                
                # Convert decimal to American odds
                if odds1 and odds1 > 0:
                    if odds1 >= 2.0:
                        american1 = int((odds1 - 1) * 100)
                    else:
                        american1 = int(-100 / (odds1 - 1))
                    if best_odds1 is None:
                        best_odds1 = american1
                else:
                    american1 = None
                    
                if odds2 and odds2 > 0:
                    if odds2 >= 2.0:
                        american2 = int((odds2 - 1) * 100)
                    else:
                        american2 = int(-100 / (odds2 - 1))
                    if best_odds2 is None:
                        best_odds2 = american2
                else:
                    american2 = None
                
                if american1 or american2:
                    sportsbooks[book_key] = {
                        "fighter1_odds": american1,
                        "fighter2_odds": american2
                    }
                    
                    # Add prop odds if available
                    if row.get("f1_ko_odds"):
                        try:
                            ko1 = float(row["f1_ko_odds"])
                            sportsbooks[book_key]["f1_ko"] = int((ko1 - 1) * 100) if ko1 >= 2 else int(-100 / (ko1 - 1))
                        except:
                            pass
                    if row.get("f1_sub_odds"):
                        try:
                            sub1 = float(row["f1_sub_odds"])
                            sportsbooks[book_key]["f1_sub"] = int((sub1 - 1) * 100) if sub1 >= 2 else int(-100 / (sub1 - 1))
                        except:
                            pass
            except Exception as e:
                continue
        
        if sportsbooks:
            record = {
                "fight_id": f"kaggle_{f1}_{f2}_{date}".replace(" ", "_"),
                "sport": "MMA",
                "event": "UFC",
                "date": date,
                "fighter1": f1,
                "fighter2": f2,
                "fighter1_odds": best_odds1,
                "fighter2_odds": best_odds2,
                "odds": {
                    "source": "kaggle",
                    "sportsbooks": sportsbooks
                }
            }
            new_fights.append(record)
    
    print(f"New fights to add: {len(new_fights)}")
    
    # Merge and save
    all_data = existing + new_fights
    with open(mma_file, "w") as f:
        json.dump(all_data, f, indent=2)
    
    print(f"\n=== UFC IMPORT COMPLETE ===")
    print(f"Previous: {len(existing)}")
    print(f"Added: {len(new_fights)}")  
    print(f"Total: {len(all_data)}")
    
    # Date range
    if new_fights:
        dates = sorted([f["date"] for f in new_fights])
        print(f"Date range: {dates[0]} to {dates[-1]}")

if __name__ == "__main__":
    import_ufc_odds()
