#!/usr/bin/env python3
"""
Build NCAA team name mapping (odds team names -> NCAA short names).
"""
import re
import psycopg2
from difflib import SequenceMatcher


STOP_WORDS = {"university", "college", "state", "st", "st.", "the"}


def load_db_url():
    with open('/var/www/html/eventheodds/.env', 'r') as f:
        for line in f:
            if line.startswith('SPORTS_DATABASE_URL='):
                return line.split('=', 1)[1].strip().split('?')[0]
    return ''


def normalize(name: str) -> str:
    if not name:
        return ''
    cleaned = re.sub(r'[^a-zA-Z0-9 ]+', ' ', name.lower())
    tokens = [t for t in cleaned.split() if t not in STOP_WORDS]
    return ' '.join(tokens).strip()


def strip_mascot(name: str) -> str:
    parts = name.split()
    if len(parts) <= 1:
        return name
    return ' '.join(parts[:-1])


def best_match(odds_name: str, candidates: list[str]) -> tuple[str, float]:
    odds_norm = normalize(strip_mascot(odds_name))
    best = ('', 0.0)
    for cand in candidates:
        cand_norm = normalize(cand)
        if not cand_norm:
            continue
        score = SequenceMatcher(None, odds_norm, cand_norm).ratio()
        if score > best[1]:
            best = (cand, score)
    return best


def build_map(league: str, threshold: float = 0.65) -> int:
    db_url = load_db_url()
    if not db_url:
        raise RuntimeError('SPORTS_DATABASE_URL not set')

    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    cur.execute(
        """
        SELECT DISTINCT
          raw->'game'->'home'->'names'->>'full' as full_name,
          raw->'game'->'home'->'names'->>'short' as short_name,
          "homeTeam" as fallback
        FROM "SportsGame"
        WHERE league = %s AND raw IS NOT NULL
        """,
        (league,)
    )
    team_names = set()
    for full_name, short_name, fallback in cur.fetchall():
        if full_name:
            team_names.add(full_name)
        if short_name:
            team_names.add(short_name)
        if fallback:
            team_names.add(fallback)
    team_names = list(team_names)

    cur.execute(
        """
        SELECT DISTINCT "homeTeam" FROM "GameOdds"
        WHERE league = %s AND market IN ('spreads','totals','h2h')
        """,
        (league,)
    )
    odds_names = [r[0] for r in cur.fetchall() if r[0]]

    upserts = 0
    for odds_name in odds_names:
        match_name, score = best_match(odds_name, team_names)
        if score < threshold or not match_name:
            continue
        cur.execute(
            """
            INSERT INTO "TeamNameMap" (league, source, name, "mappedName", confidence)
            VALUES (%s, 'odds', %s, %s, %s)
            ON CONFLICT (league, source, name)
            DO UPDATE SET "mappedName" = EXCLUDED."mappedName", confidence = EXCLUDED.confidence
            """,
            (league, odds_name, match_name, float(score))
        )
        upserts += 1

    conn.commit()
    cur.close()
    conn.close()
    print(f'✅ {league} team mappings upserted: {upserts}')
    return upserts


if __name__ == '__main__':
    build_map('ncaab')
    build_map('ncaaf')
