#!/usr/bin/env python3
"""
NCAAB Cross-Table Team Name Normalization

Normalizes dirty NCAAB team names across SportsGame, OddsSnapshot, GameOdds,
and LineMovement to use The Odds API canonical format.

Phase 1: Normalize SportsGame homeTeam/awayTeam
Phase 2: Normalize OddsSnapshot homeTeam/awayTeam
Phase 3: Normalize GameOdds homeTeam/awayTeam
Phase 4: Normalize LineMovement homeTeam/awayTeam
Phase 5: Also normalize within odds tables (e.g. "Colorado State Rams" → "Colorado St Rams")
Phase 6: Link orphaned OddsSnapshot to SportsGame by team+date

Usage:
    python3 ncaab_normalize_all_tables.py --dry-run
    python3 ncaab_normalize_all_tables.py --phase 1
    python3 ncaab_normalize_all_tables.py
"""

import argparse
import os
import sys

import psycopg2

# ---------------------------------------------------------------------------
# Build the canonical name map dynamically from DB + corrections
# ---------------------------------------------------------------------------

def get_db_url():
    env_path = os.path.join(os.path.dirname(__file__), "..", ".env")
    if os.path.exists(env_path):
        with open(env_path) as f:
            for line in f:
                line = line.strip()
                if line.startswith("SPORTS_DATABASE_URL="):
                    url = line.split("=", 1)[1]
                    return url.split("?")[0]
    return None

def connect_db():
    url = get_db_url()
    if not url:
        print("ERROR: SPORTS_DATABASE_URL not found")
        sys.exit(1)
    return psycopg2.connect(url)

# Odds API canonical duplicates to normalize (pick one canonical form)
ODDS_DUPES = {
    "Colorado State Rams": "Colorado St Rams",
    "North Dakota State Bison": "North Dakota St Bison",
    "Loyola Chicago Ramblers": "Loyola (Chi) Ramblers",
    "Connecticut Huskies": "UConn Huskies",
}

# Manual corrections: short/ambiguous names → correct canonical
# These override the auto-prefix matching which gets them wrong
MANUAL_FIXES = {
    # Flagship universities (short name = flagship, NOT the "State" / "A&M" variant)
    "Alabama": "Alabama Crimson Tide",
    "Arizona": "Arizona Wildcats",
    "Arkansas": "Arkansas Razorbacks",
    "California Baptist": "Cal Baptist Lancers",
    "California Baptist Lancers": "Cal Baptist Lancers",
    "Colorado": "Colorado Buffaloes",
    "Florida": "Florida Gators",
    "Florida A and M": "Florida A&M Rattlers",
    "Houston": "Houston Cougars",
    "Houston Baptist": "Houston Christian Huskies",
    "Idaho": "Idaho Vandals",
    "IDHO": "Idaho Vandals",
    "ILL": "Illinois Fighting Illini",
    "Illinois": "Illinois Fighting Illini",
    "Iowa": "Iowa Hawkeyes",
    "IOWA": "Iowa Hawkeyes",
    "Maryland": "Maryland Terrapins",
    "MICH": "Michigan Wolverines",
    "Michigan": "Michigan Wolverines",
    "Mississippi": "Ole Miss Rebels",
    "N MEX": "New Mexico Lobos",
    "New Mexico": "New Mexico Lobos",
    "UNM": "New Mexico Lobos",
    "NO DAK": "North Dakota Fighting Hawks",
    "North Dakota": "North Dakota Fighting Hawks",
    "Northwestern": "Northwestern Wildcats",
    "OHIO": "Ohio Bobcats",
    "Ohio": "Ohio Bobcats",
    "Oklahoma": "Oklahoma Sooners",
    "ORE": "Oregon Ducks",
    "Oregon": "Oregon Ducks",
    "Portland": "Portland Pilots",
    "S CAR": "South Carolina Gamecocks",
    "SCAR": "South Carolina Gamecocks",
    "S Carolina": "South Carolina Gamecocks",
    "South Carolina": "South Carolina Gamecocks",
    "San Diego": "San Diego Toreros",
    "Southern": "Southern Jaguars",
    "TA&M": "Texas A&M Aggies",
    "TX A&M": "Texas A&M Aggies",
    "Texas A&M": "Texas A&M Aggies",
    "Texas A and M": "Texas A&M Aggies",
    "Tennessee": "Tennessee Volunteers",
    "Texas": "Texas Longhorns",
    "Utah": "Utah Utes",
    "C OF C": "Charleston Cougars",
    "COFC": "Charleston Cougars",
    "College of Charleston": "Charleston Cougars",
    "College Of Charleston": "Charleston Cougars",
    "Connecticut": "UConn Huskies",
    "Miami": "Miami Hurricanes",
    "Miami Florida": "Miami Hurricanes",
    # Specific school corrections
    "Army Black Knights": "Army Knights",
    "Grambling Tigers": "Grambling St Tigers",
    "Purdue Fort Wayne": "Fort Wayne Mastodons",
    "Purdue Fort Wayne Mastodons": "Fort Wayne Mastodons",
    "Nicholls Colonels": "Nicholls St Colonels",
    "San Jose State": "San José St Spartans",
    "San Jose State Spartans": "San José St Spartans",
    "IU Indianapolis": "IUPUI Jaguars",
    "IU Indianapolis Jaguars": "IUPUI Jaguars",
    "Florida International": "Florida Int'l Golden Panthers",
    "Florida International Panthers": "Florida Int'l Golden Panthers",
    "Florida Panthers": "Florida Int'l Golden Panthers",
    "Little Rock": "Arkansas-Little Rock Trojans",
    "Little Rock Trojans": "Arkansas-Little Rock Trojans",
    "Long Island": "LIU Sharks",
    "Long Island University Sharks": "LIU Sharks",
    "UMass Minutemen": "Massachusetts Minutemen",
    "Loyola Maryland": "Loyola (MD) Greyhounds",
    "Loyola-Maryland": "Loyola (MD) Greyhounds",
    "Loyola Maryland Greyhounds": "Loyola (MD) Greyhounds",
    "NC Asheville": "UNC Asheville Bulldogs",
    "NC Greensboro": "UNC Greensboro Spartans",
    "NC Wilmington": "UNC Wilmington Seahawks",
    "MD Baltimore County": "UMBC Retrievers",
    "MD Eastern Shore": "Maryland-Eastern Shore Hawks",
    "Miami Ohio": "Miami (OH) RedHawks",
    "Mississippi Valley State": "Miss Valley St Delta Devils",
    "Mississippi Valley State Delta Devils": "Miss Valley St Delta Devils",
    "Mount St Marys": "Mt. St. Mary's Mountaineers",
    "Mount St. Mary's": "Mt. St. Mary's Mountaineers",
    "Mount St. Mary's Mountaineers": "Mt. St. Mary's Mountaineers",
    "Southeast Missouri State": "SE Missouri St Redhawks",
    "Southeast Missouri State Redhawks": "SE Missouri St Redhawks",
    "Southeastern Louisiana Lions": "SE Louisiana Lions",
    "SE Louisiana": "SE Louisiana Lions",
    "Nebraska Omaha": "Omaha Mavericks",
    "Nebraska-Omaha Mavericks": "Omaha Mavericks",
    "App State Mountaineers": "Appalachian St Mountaineers",
    "George Washington": "GW Revolutionaries",
    "George Washington Revolutionaries": "GW Revolutionaries",
    "G WASH": "GW Revolutionaries",
    "UAlbany": "Albany Great Danes",
    "Ualbany Great Danes": "Albany Great Danes",
    "Albany NY": "Albany Great Danes",
    "Kansas City Roos": "UMKC Kangaroos",
    "Wisc Green Bay": "Green Bay Phoenix",
    "Wisc Milwaukee": "Milwaukee Panthers",
    "USC Upstate": "South Carolina Upstate Spartans",
    "Seattle U": "Seattle Redhawks",
    "Seattle U Redhawks": "Seattle Redhawks",
    "Central Florida": "UCF Knights",
    "Charlotte U": "Charlotte 49ers",
    "Cal Irvine": "UC Irvine Anteaters",
    "Cal Riverside": "UC Riverside Highlanders",
    "Cal Santa Barbara": "UC Santa Barbara Gauchos",
    "Cal State Bakersfield": "CSU Bakersfield Roadrunners",
    "Cal State Bakersfield Roadrunners": "CSU Bakersfield Roadrunners",
    "Cal State Fullerton Titans": "CSU Fullerton Titans",
    "Cal State Northridge": "CSU Northridge Matadors",
    "Cal State-Northridge": "CSU Northridge Matadors",
    "Cal State Northridge Matadors": "CSU Northridge Matadors",
    "Illinois Chicago": "UIC Flames",
    "Southern Methodist": "SMU Mustangs",
    "Saint Josephs": "Saint Joseph's Hawks",
    "St. Joseph's": "Saint Joseph's Hawks",
    "JOES": "Saint Joseph's Hawks",
    "St. Peter's": "Saint Peter's Peacocks",
    "UL - Lafayette": "Louisiana Ragin' Cajuns",
    "UL - Monroe": "UL Monroe Warhawks",
    "Tennessee Martin": "Tenn-Martin Skyhawks",
    "Ut Martin Skyhawks": "Tenn-Martin Skyhawks",
    "Northern Colorado": "N Colorado Bears",
    "Northern Colorado Bears": "N Colorado Bears",
    "Texas-Arlington": "UT-Arlington Mavericks",
    "McNeese State": "McNeese Cowboys",
    "Mcneese Cowboys": "McNeese Cowboys",
    "Belmontab": "Belmont Bruins",
    "E Carolina": "East Carolina Pirates",
    "S Florida": "South Florida Bulls",
    "SC State": "South Carolina St Bulldogs",
    "Saint Francis Red Flash": "St. Francis (PA) Red Flash",
    # Don't map non-D1 schools to D1 schools
    # "San Diego Christian Hawks" — skip
    # "West Virginia Wesleyan Bobcats" — skip
    # "Penn-St-Fayette" etc — skip (branch campuses)
    # "Montana State-Northern Lights" — skip (different school)
    # "St. Francis Brooklyn Terriers" — skip or map correctly
    "St. Francis Brooklyn Terriers": "LIU Sharks",  # FDU merged w/ LIU
    "Hawai'I Rainbow Warriors": "Hawai'i Rainbow Warriors",
    "Miami Heat": None,  # wrong sport
    "Washington Commanders": None,  # wrong sport
    "Texas Rangers": None,  # wrong sport
    # Connecticut Huskies exists in odds but should be UConn
    "Connecticut Huskies": "UConn Huskies",
    # Colorado State Rams → Colorado St Rams (canonical in odds)
    "Colorado State Rams": "Colorado St Rams",
    "Colorado St. Rams": "Colorado St Rams",
    # North Dakota State → pick one canonical
    "North Dakota State": "North Dakota St Bison",
    "North Dakota State Bison": "North Dakota St Bison",
    "Loyola Chicago": "Loyola (Chi) Ramblers",
    "Loyola-Chicago": "Loyola (Chi) Ramblers",
    "Loyola Chicago Ramblers": "Loyola (Chi) Ramblers",
    # --- Round 2: additional fixes ---
    # Long-form / variant names
    "Long Beach State Beach": "Long Beach St 49ers",
    "Sam Houston Bearkats": "Sam Houston St Bearkats",
    "Texas A&m-Corpus Christi Islanders": "Texas A&M-CC Islanders",
    "Texas A&M-Corpus Christi Islanders": "Texas A&M-CC Islanders",
    "Texas A&M Corpus Christi": "Texas A&M-CC Islanders",
    "Texas A and M-Corpus Christi": "Texas A&M-CC Islanders",
    "American University Eagles": "American Eagles",
    "American University": "American Eagles",
    "Boston University Terriers": "Boston Univ. Terriers",
    "Boston University": "Boston Univ. Terriers",
    "Boston U": "Boston Univ. Terriers",
    "Grand Canyon Lopes": "Grand Canyon Antelopes",
    "Gardner-Webb Runnin' Bulldogs": "Gardner-Webb Bulldogs",
    "Prairie View A&M Panthers": "Prairie View Panthers",
    "Prairie View A&M": "Prairie View Panthers",
    "Prairie View A and M": "Prairie View Panthers",
    "St. Thomas Tommies": "St. Thomas (MN) Tommies",
    "St. Thomas (Minnesota)": "St. Thomas (MN) Tommies",
    "St. Thomas-Minnesota Tommies": "St. Thomas (MN) Tommies",
    "St. Thom": "St. Thomas (MN) Tommies",
    "Southern Mississippi": "Southern Miss Golden Eagles",
    "Middle Tennessee State": "Middle Tennessee Blue Raiders",
    "Central Connecticut Blue Devils": "Central Connecticut St Blue Devils",
    "Georgetown University": "Georgetown Hoyas",
    "Saint Marys CA": "Saint Mary's Gaels",
    "Saint Marys-California": "Saint Mary's Gaels",
    "St Marys (CA)": "Saint Mary's Gaels",
    "Hawaii": "Hawai'i Rainbow Warriors",
    "East Tenn State": "East Tennessee St Buccaneers",
    "Tarleton St": "Tarleton State Texans",
    "CS Bakersfield": "CSU Bakersfield Roadrunners",
    "CS Fullerton": "CSU Fullerton Titans",
    "CS Northridge": "CSU Northridge Matadors",
    "Cal Poly SLO": "Cal Poly Mustangs",
    "Massachusetts Lowell": "UMass Lowell River Hawks",
    "UCSD": "UC San Diego Tritons",
    "Penn St": "Penn State Nittany Lions",
    "Ball St": "Ball State Cardinals",
    "Texas San Antonio": "UTSA Roadrunners",
    "Tamu-San-Antonio": "UTSA Roadrunners",
    "Texas A&M Commerce": "East Texas A&M Lions",
    "Alabama A and M": "Alabama A&M Bulldogs",
    "N Alabama": "North Alabama Lions",
    "Grambling": "Grambling St Tigers",
    "Georgetown": "Georgetown Hoyas",
    "St. Francis PA": "St. Francis (PA) Red Flash",
    "St Francis-Pennsylvania": "St. Francis (PA) Red Flash",
    "Virginia-Lynchburg Dragons": None,
    "Hartford Hawks": None,
    # Single-word D1 school names → canonical with mascot
    "Akron": "Akron Zips",
    "American": "American Eagles",
    "Army": "Army Knights",
    "Auburn": "Auburn Tigers",
    "Baylor": "Baylor Bears",
    "Bellarmine": "Bellarmine Knights",
    "Belmont": "Belmont Bruins",
    "Binghamton": "Binghamton Bearcats",
    "Bradley": "Bradley Braves",
    "Brown": "Brown Bears",
    "Bryant": "Bryant Bulldogs",
    "Bucknell": "Bucknell Bison",
    "Buffalo": "Buffalo Bulls",
    "Butler": "Butler Bulldogs",
    "California": "California Golden Bears",
    "Campbell": "Campbell Fighting Camels",
    "Canisius": "Canisius Golden Griffins",
    "Charlotte": "Charlotte 49ers",
    "Chattanooga": "Chattanooga Mocs",
    "Cincinnati": "Cincinnati Bearcats",
    "Clemson": "Clemson Tigers",
    "Colgate": "Colgate Raiders",
    "Columbia": "Columbia Lions",
    "Cornell": "Cornell Big Red",
    "Creighton": "Creighton Bluejays",
    "Dartmouth": "Dartmouth Big Green",
    "Davidson": "Davidson Wildcats",
    "Dayton": "Dayton Flyers",
    "Delaware": "Delaware Blue Hens",
    "Denver": "Denver Pioneers",
    "DePaul": "DePaul Blue Demons",
    "Detroit": "Detroit Mercy Titans",
    "Drake": "Drake Bulldogs",
    "Drexel": "Drexel Dragons",
    "Duke": "Duke Blue Devils",
    "Duquesne": "Duquesne Dukes",
    "Elon": "Elon Phoenix",
    "Evansville": "Evansville Purple Aces",
    "Fairfield": "Fairfield Stags",
    "Fordham": "Fordham Rams",
    "Furman": "Furman Paladins",
    "Georgia": "Georgia Bulldogs",
    "Gonzaga": "Gonzaga Bulldogs",
    "Hampton": "Hampton Pirates",
    "Harvard": "Harvard Crimson",
    "Hofstra": "Hofstra Pride",
    "Howard": "Howard Bison",
    "Indiana": "Indiana Hoosiers",
    "Iona": "Iona Gaels",
    "Jacksonville": "Jacksonville Dolphins",
    "Kansas": "Kansas Jayhawks",
    "Kentucky": "Kentucky Wildcats",
    "Lafayette": "Lafayette Leopards",
    "Lamar": "Lamar Cardinals",
    "Lehigh": "Lehigh Mountain Hawks",
    "Liberty": "Liberty Flames",
    "Lindenwood": "Lindenwood Lions",
    "Lipscomb": "Lipscomb Bisons",
    "Longwood": "Longwood Lancers",
    "Louisville": "Louisville Cardinals",
    "Maine": "Maine Black Bears",
    "Manhattan": "Manhattan Jaspers",
    "Marist": "Marist Red Foxes",
    "Marquette": "Marquette Golden Eagles",
    "Marshall": "Marshall Thundering Herd",
    "Massachusetts": "Massachusetts Minutemen",
    "Memphis": "Memphis Tigers",
    "Mercer": "Mercer Bears",
    "Mercyhurst": "Mercyhurst Lakers",
    "Merrimack": "Merrimack Warriors",
    "Minnesota": "Minnesota Golden Gophers",
    "Missouri": "Missouri Tigers",
    "Monmouth": "Monmouth Hawks",
    "Montana": "Montana Grizzlies",
    "Navy": "Navy Midshipmen",
    "Nebraska": "Nebraska Cornhuskers",
    "Nevada": "Nevada Wolf Pack",
    "Niagara": "Niagara Purple Eagles",
    "Northeastern": "Northeastern Huskies",
    "Oakland": "Oakland Golden Grizzlies",
    "Pacific": "Pacific Tigers",
    "Pennsylvania": "Pennsylvania Quakers",
    "Pepperdine": "Pepperdine Waves",
    "Pittsburgh": "Pittsburgh Panthers",
    "Presbyterian": "Presbyterian Blue Hose",
    "Princeton": "Princeton Tigers",
    "Providence": "Providence Friars",
    "Purdue": "Purdue Boilermakers",
    "Queens": "Queens University Royals",
    "Quinnipiac": "Quinnipiac Bobcats",
    "Radford": "Radford Highlanders",
    "Rice": "Rice Owls",
    "Richmond": "Richmond Spiders",
    "Rider": "Rider Broncs",
    "Rutgers": "Rutgers Scarlet Knights",
    "Samford": "Samford Bulldogs",
    "Seattle": "Seattle Redhawks",
    "Siena": "Siena Saints",
    "Stanford": "Stanford Cardinal",
    "Stetson": "Stetson Hatters",
    "Stonehill": "Stonehill Skyhawks",
    "Syracuse": "Syracuse Orange",
    "Temple": "Temple Owls",
    "Toledo": "Toledo Rockets",
    "Towson": "Towson Tigers",
    "Troy": "Troy Trojans",
    "Tulane": "Tulane Green Wave",
    "Tulsa": "Tulsa Golden Hurricane",
    "Valparaiso": "Valparaiso Beacons",
    "Vanderbilt": "Vanderbilt Commodores",
    "Vermont": "Vermont Catamounts",
    "Villanova": "Villanova Wildcats",
    "Virginia": "Virginia Cavaliers",
    "Wagner": "Wagner Seahawks",
    "Washington": "Washington Huskies",
    "Winthrop": "Winthrop Eagles",
    "Wisconsin": "Wisconsin Badgers",
    "Wofford": "Wofford Terriers",
    "Wyoming": "Wyoming Cowboys",
    "Xavier": "Xavier Musketeers",
    "Yale": "Yale Bulldogs",
}

# Abbreviation map (SGO/kaggle short codes → canonical name)
ABBREV_TO_CANONICAL = {
    "AFA": "Air Force Falcons",
    "AKR": "Akron Zips",
    "AL A&M": "Alabama A&M Bulldogs",
    "ALA ST": "Alabama St Hornets",
    "ALBY": "Albany Great Danes",
    "AMER": "American Eagles",
    "AND IN": None,
    "A PEAY": "Austin Peay Governors",
    "APP ST": "Appalachian St Mountaineers",
    "AR BAP": None,
    "ARK PB": "Arkansas-Pine Bluff Golden Lions",
    "ARK ST": "Arkansas St Red Wolves",
    "ASU": "Arizona St Sun Devils",
    "AU": "Auburn Tigers",
    "AUG U": None,
    "AZ ST": "Arizona St Sun Devils",
    "BALL": "Ball State Cardinals",
    "BAY": "Baylor Bears",
    "BC": "Boston College Eagles",
    "BGSU": "Bowling Green Falcons",
    "BOB JO": None,
    "BSU": "Boise State Broncos",
    "BUFF": "Buffalo Bulls",
    "C ARK": "Central Arkansas Bears",
    "C CONN": "Central Connecticut St Blue Devils",
    "CAMP": "Campbell Fighting Camels",
    "CHI ST": "Chicago St Cougars",
    "CLE ST": "Cleveland St Vikings",
    "C MICH": "Central Michigan Chippewas",
    "CMU": "Central Michigan Chippewas",
    "CO CAR": "Coastal Carolina Chanticleers",
    "CO ST": "Colorado St Rams",
    "DEL ST": "Delaware St Hornets",
    "DUKE": "Duke Blue Devils",
    "E CAR": "East Carolina Pirates",
    "E ILL": "Eastern Illinois Panthers",
    "E KY": "Eastern Kentucky Colonels",
    "E MICH": "Eastern Michigan Eagles",
    "ELON": "Elon Phoenix",
    "EMU": "Eastern Michigan Eagles",
    "E WASH": "Eastern Washington Eagles",
    "FL A&M": "Florida A&M Rattlers",
    "FRES": "Fresno St Bulldogs",
    "FSU": "Florida St Seminoles",
    "GA SOU": "Georgia Southern Eagles",
    "GA ST": "Georgia St Panthers",
    "GC": "Grand Canyon Antelopes",
    "GMU": "George Mason Patriots",
    "GRAM": "Grambling St Tigers",
    "GW": "GW Revolutionaries",
    "G WEBB": "Gardner-Webb Bulldogs",
    "ID ST": "Idaho State Bengals",
    "IDST": "Idaho State Bengals",
    "ILL ST": "Illinois St Redbirds",
    "ILST": "Illinois St Redbirds",
    "IND ST": "Indiana St Sycamores",
    "IW": "Incarnate Word Cardinals",
    "JAC": "Jacksonville Dolphins",
    "JAX ST": "Jacksonville St Gamecocks",
    "JKST": "Jackson St Tigers",
    "KAN ST": "Kansas St Wildcats",
    "KENT": "Kent State Golden Flashes",
    "KSU": "Kansas St Wildcats",
    "LAF": "Lafayette Leopards",
    "LA MON": "UL Monroe Warhawks",
    "LAS": "La Salle Explorers",
    "LINW": "Lindenwood Lions",
    "LMC": "Le Moyne Dolphins",
    "LOY LA": "Loyola Marymount Lions",
    "LOY MD": "Loyola (MD) Greyhounds",
    "LOU": "Louisville Cardinals",
    "LSU": "LSU Tigers",
    "L-MD": "Loyola (MD) Greyhounds",
    "M-OH": "Miami (OH) RedHawks",
    "MASS": "Massachusetts Minutemen",
    "MEM": "Memphis Tigers",
    "MIA OH": "Miami (OH) RedHawks",
    "MINN": "Minnesota Golden Gophers",
    "MIZZ": "Missouri Tigers",
    "MO ST": "Missouri St Bears",
    "MOSU": "Missouri St Bears",
    "MS VAL": "Miss Valley St Delta Devils",
    "MSU": "Michigan St Spartans",
    "N ALA": "North Alabama Lions",
    "NAU": "Northern Arizona Lumberjacks",
    "NC A&T": "North Carolina A&T Aggies",
    "NC CEN": "North Carolina Central Eagles",
    "NC ILL": "Northern Illinois Huskies",
    "NC ST": "NC State Wolfpack",
    "NCST": "NC State Wolfpack",
    "N COLO": "N Colorado Bears",
    "N DAME": "Notre Dame Fighting Irish",
    "ND ST": "North Dakota St Bison",
    "NEB": "Nebraska Cornhuskers",
    "NEV": "Nevada Wolf Pack",
    "NIU": "Northern Illinois Huskies",
    "N IOWA": "Northern Iowa Panthers",
    "N KY": "Northern Kentucky Norse",
    "NM ST": "New Mexico St Aggies",
    "NO FLA": "North Florida Ospreys",
    "NO TEX": "North Texas Mean Green",
    "NW ST": "Northwestern St Demons",
    "ODU": "Old Dominion Monarchs",
    "ORE ST": "Oregon St Beavers",
    "OSU": "Ohio State Buckeyes",
    "PEAY": "Austin Peay Governors",
    "PFW": "Fort Wayne Mastodons",
    "PSU": "Penn State Nittany Lions",
    "PUR": "Purdue Boilermakers",
    "PV A&M": "Prairie View Panthers",
    "RID": "Rider Broncs",
    "S ALA": "South Alabama Jaguars",
    "S Alabama": "South Alabama Jaguars",
    "SAC ST": "Sacramento St Hornets",
    "SBON": "St. Bonaventure Bonnies",
    "SC ST": "South Carolina St Bulldogs",
    "SC UPS": "South Carolina Upstate Spartans",
    "SDSU": "San Diego St Aztecs",
    "SE LA": "SE Louisiana Lions",
    "SEMO": "SE Missouri St Redhawks",
    "S FRAN": "San Francisco Dons",
    "SIH": "Southern Illinois Salukis",
    "SIND": "Southern Indiana Screaming Eagles",
    "S IND": "Southern Indiana Screaming Eagles",
    "SIU": "Southern Illinois Salukis",
    "SJSU": "San José St Spartans",
    "SLU": "Saint Louis Billikens",
    "SMC": "Saint Mary's Gaels",
    "SMU": "SMU Mustangs",
    "S MISS": "Southern Miss Golden Eagles",
    "SO DAK": "South Dakota Coyotes",
    "S UTAH": "Southern Utah Thunderbirds",
    "ST LOU": "Saint Louis Billikens",
    "ST PTR": "Saint Peter's Peacocks",
    "STON": "Stonehill Skyhawks",
    "TAMC": "Texas A&M-CC Islanders",
    "TARL": "Tarleton State Texans",
    "TCU": "TCU Horned Frogs",
    "TOWS": "Towson Tigers",
    "TTU": "Texas Tech Red Raiders",
    "TX ARL": "UT-Arlington Mavericks",
    "TX SOU": "Texas Southern Tigers",
    "TX ST": "Texas State Bobcats",
    "UC DAV": "UC Davis Aggies",
    "UC IRV": "UC Irvine Anteaters",
    "UC RIV": "UC Riverside Highlanders",
    "UCF": "UCF Knights",
    "UCLA": "UCLA Bruins",
    "UGA": "Georgia Bulldogs",
    "UK": "Kentucky Wildcats",
    "UMass": "Massachusetts Minutemen",
    "UNC": "North Carolina Tar Heels",
    "UNC A": "UNC Asheville Bulldogs",
    "UNC G": "UNC Greensboro Spartans",
    "UNCO": "N Colorado Bears",
    "UNF": "North Florida Ospreys",
    "URI": "Rhode Island Rams",
    "USU": "Utah State Aggies",
    "UT MAR": "Tenn-Martin Skyhawks",
    "UT ST": "Utah State Aggies",
    "UT VAL": "Utah Valley Wolverines",
    "UTM": "Tenn-Martin Skyhawks",
    "VCU": "VCU Rams",
    "VILL": "Villanova Wildcats",
    "VT": "Virginia Tech Hokies",
    "W CAR": "Western Carolina Catamounts",
    "WAG": "Wagner Seahawks",
    "WEB": "Weber State Wildcats",
    "WEB ST": "Weber State Wildcats",
    "W GA": "West Georgia Wolves",
    "WICH": "Wichita St Shockers",
    "W ILL": "Western Illinois Leathernecks",
    "WIU": "Western Illinois Leathernecks",
    "WIS": "Wisconsin Badgers",
    "W KY": "Western Kentucky Hilltoppers",
    "WM": "William & Mary Tribe",
    "W&M": "William & Mary Tribe",
    "WM&MRY": "William & Mary Tribe",
    "W MICH": "Western Michigan Broncos",
    "WMU": "Western Michigan Broncos",
    "WYO": "Wyoming Cowboys",
    "XAV": "Xavier Musketeers",
    "YSU": "Youngstown St Penguins",
    "MO SOU": None, "CO CHR": None, "CO COL": None, "FL CHR": None,
    "BL COL": None, "KY CHR": None, "KY ST": None, "LA SI": None,
    "MGA ST": None, "MHU": None, "MT ALO": None, "MT OLI": None,
    "MT STV": None, "MSM NY": None, "NC MIN": None, "NC WES": None,
    "ND MD": None, "NE ST": None, "IND NW": None, "PUR NW": None,
    "REG MA": None, "SF ST": None, "SOU NO": None, "S ORE": None,
    "TN WES": None, "TNTC": None, "TX COL": None, "TX DAL": None,
    "TX LUT": None, "TX PER": None, "TX WES": None, "UW ST": None,
    "W NM": None, "VA LYN": None, "VA WES": None, "S WESL": None,
    "SW TX": None, "PAC OR": None, "ED WAT": None, "MON IL": None,
    "CEN ST": None, "MO S&T": None, "MS&T": None, "MNE FK": None,
    "PR BYM": None, "PR MAY": None, "ROG ST": None, "SO VA": None,
    "PSU AL": None, "FT VAL": None, "UC CS": None, "L-IL": None,
}


def build_name_map(cur):
    """Build the complete mapping from all dirty names → canonical."""
    # 1. Get canonical names from odds tables
    cur.execute("""
        SELECT DISTINCT name FROM (
            SELECT "homeTeam" AS name FROM "GameOdds" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "GameOdds" WHERE league='ncaab'
            UNION SELECT "homeTeam" FROM "OddsSnapshot" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "OddsSnapshot" WHERE league='ncaab'
            UNION SELECT "homeTeam" FROM "LineMovement" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "LineMovement" WHERE league='ncaab'
        ) x WHERE name IS NOT NULL
    """)
    canonical_set = set(row[0] for row in cur.fetchall())

    # Apply odds dupes normalization
    for old, new in ODDS_DUPES.items():
        canonical_set.discard(old)
        canonical_set.add(new)

    # 2. Get all names from ALL ncaab tables
    cur.execute("""
        SELECT DISTINCT name FROM (
            SELECT "homeTeam" AS name FROM "SportsGame" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "SportsGame" WHERE league='ncaab'
            UNION SELECT "homeTeam" FROM "OddsSnapshot" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "OddsSnapshot" WHERE league='ncaab'
            UNION SELECT "homeTeam" FROM "GameOdds" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "GameOdds" WHERE league='ncaab'
            UNION SELECT "homeTeam" FROM "LineMovement" WHERE league='ncaab'
            UNION SELECT "awayTeam" FROM "LineMovement" WHERE league='ncaab'
        ) x WHERE name IS NOT NULL
    """)
    all_names = set(row[0] for row in cur.fetchall())

    # 3. Build the map
    name_map = {}

    # First: apply manual fixes and abbreviations
    for dirty, canonical in {**MANUAL_FIXES, **ABBREV_TO_CANONICAL}.items():
        if canonical is None:
            continue
        if dirty in all_names and dirty != canonical:
            name_map[dirty] = canonical

    # Second: auto-match remaining names by prefix
    import re
    def normalize(s):
        s = s.lower().strip()
        s = s.replace("\u2019", "").replace("'", "").replace(".", "").replace("-", " ").replace("&", "and")
        s = re.sub(r'\s+', ' ', s)
        return s

    canonical_by_norm = {normalize(n): n for n in canonical_set}

    for name in sorted(all_names):
        if name in canonical_set or name in name_map:
            continue

        n = normalize(name)

        # Exact normalized match
        if n in canonical_by_norm:
            name_map[name] = canonical_by_norm[n]
            continue

        # "State" → "St" normalization
        n2 = n.replace(" state ", " st ").replace(" state", " st")
        if n2 != n and n2 in canonical_by_norm:
            name_map[name] = canonical_by_norm[n2]
            continue

        # Full name without mascot → canonical with mascot
        # e.g. "Abilene Christian" → "Abilene Christian Wildcats"
        for cn_norm, cn in canonical_by_norm.items():
            if cn_norm.startswith(n + " ") and " " in n:  # require 2+ word match
                name_map[name] = cn
                break

        if name in name_map:
            continue

        # Try with "State" → "St"
        if n2 != n:
            for cn_norm, cn in canonical_by_norm.items():
                if cn_norm.startswith(n2 + " ") and " " in n2:
                    name_map[name] = cn
                    break

    # Also add odds dupe normalization
    for old, new in ODDS_DUPES.items():
        if old in all_names:
            name_map[old] = new

    return name_map, canonical_set


# ---------------------------------------------------------------------------
# Normalize a table's team columns
# ---------------------------------------------------------------------------

def normalize_table(cur, dry_run, table, columns, name_map, unique_key_cols_for_col=None, update_extra=None):
    """Normalize team name columns in a table.
    update_extra: dict of {"col_expr": "val_expr"} to add to SET clause (e.g. updatedAt=NOW()).
    """
    total_updated = 0
    total_deleted = 0

    for col in columns:
        cur.execute(
            f'SELECT DISTINCT "{col}" FROM "{table}" WHERE league=%s',
            ("ncaab",),
        )
        distinct_names = [row[0] for row in cur.fetchall() if row[0]]

        for name in distinct_names:
            canonical = name_map.get(name)
            if not canonical or canonical == name:
                continue

            cur.execute(
                f'SELECT COUNT(*) FROM "{table}" WHERE league=%s AND "{col}"=%s',
                ("ncaab", name),
            )
            count = cur.fetchone()[0]
            if count == 0:
                continue

            # Check for unique constraint conflicts
            uk_cols = (unique_key_cols_for_col or {}).get(col, [])
            conflict_count = 0
            if uk_cols:
                join_conds = " AND ".join(f'c."{c}" = d."{c}"' for c in uk_cols)
                cur.execute(
                    f"""SELECT COUNT(*) FROM "{table}" d
                        WHERE d.league=%s AND d."{col}"=%s
                        AND EXISTS (
                            SELECT 1 FROM "{table}" c
                            WHERE c.league=%s AND c."{col}"=%s
                            AND {join_conds}
                        )""",
                    ("ncaab", name, "ncaab", canonical),
                )
                conflict_count = cur.fetchone()[0]

            safe_count = count - conflict_count

            if conflict_count > 0:
                print(f"  {table}.{col}: '{name}' → delete {conflict_count} dupes")
                if not dry_run:
                    join_conds = " AND ".join(f'c."{c}" = d."{c}"' for c in uk_cols)
                    cur.execute(
                        f"""DELETE FROM "{table}" d
                            USING "{table}" c
                            WHERE d.league=%s AND d."{col}"=%s
                            AND c.league=%s AND c."{col}"=%s
                            AND {join_conds}""",
                        ("ncaab", name, "ncaab", canonical),
                    )
                total_deleted += conflict_count

            if safe_count > 0:
                print(f"  {table}.{col}: '{name}' → '{canonical}' ({safe_count} rows)")
                if not dry_run:
                    extra_set = ""
                    if update_extra:
                        extra_set = ", " + ", ".join(f'{k}={v}' for k, v in update_extra.items())
                    cur.execute(
                        f'UPDATE "{table}" SET "{col}"=%s{extra_set} WHERE league=%s AND "{col}"=%s',
                        (canonical, "ncaab", name),
                    )
                total_updated += safe_count

    return total_updated, total_deleted


# ---------------------------------------------------------------------------
# Phases
# ---------------------------------------------------------------------------

def phase1(cur, dry_run, name_map):
    print("\n=== Phase 1: Normalize SportsGame homeTeam/awayTeam ===")
    unique_key_cols = {
        "homeTeam": ["season", "gameDate", "awayTeam"],
        "awayTeam": ["season", "gameDate", "homeTeam"],
    }
    u, d = normalize_table(
        cur, dry_run, "SportsGame", ["homeTeam", "awayTeam"], name_map,
        unique_key_cols_for_col=unique_key_cols,
        update_extra={'"updatedAt"': "NOW()"},
    )
    print(f"  Phase 1: {u} updated, {d} dupes deleted {'(dry run)' if dry_run else ''}")
    return u + d

def phase2(cur, dry_run, name_map):
    print("\n=== Phase 2: Normalize OddsSnapshot homeTeam/awayTeam ===")
    u, d = normalize_table(cur, dry_run, "OddsSnapshot", ["homeTeam", "awayTeam"], name_map)
    print(f"  Phase 2: {u} updated, {d} dupes deleted {'(dry run)' if dry_run else ''}")
    return u + d

def phase3(cur, dry_run, name_map):
    print("\n=== Phase 3: Normalize GameOdds homeTeam/awayTeam ===")
    u, d = normalize_table(cur, dry_run, "GameOdds", ["homeTeam", "awayTeam"], name_map)
    print(f"  Phase 3: {u} updated, {d} dupes deleted {'(dry run)' if dry_run else ''}")
    return u + d

def phase4(cur, dry_run, name_map):
    print("\n=== Phase 4: Normalize LineMovement homeTeam/awayTeam ===")
    u, d = normalize_table(cur, dry_run, "LineMovement", ["homeTeam", "awayTeam"], name_map)
    print(f"  Phase 4: {u} updated, {d} dupes deleted {'(dry run)' if dry_run else ''}")
    return u + d

def phase5(cur, dry_run, name_map):
    print("\n=== Phase 5: Normalize ConsensusOdds + other tables ===")
    total = 0
    # Table-specific unique key info: {table: {col: [other_uk_cols]}}
    table_uk = {
        "TeamGameMetric": {
            "team": ["season", "gameKey", "statKey"],
        },
    }
    for table in ["ConsensusOdds", "PeriodOdds", "TeamGameMetric"]:
        # Check if table has ncaab data and homeTeam/awayTeam or team/opponent
        try:
            cur.execute(f'SELECT COUNT(*) FROM "{table}" WHERE league=%s', ("ncaab",))
            cnt = cur.fetchone()[0]
            if cnt == 0:
                continue
        except Exception:
            cur.connection.rollback()
            continue

        # Determine column names
        cur.execute(
            "SELECT column_name FROM information_schema.columns WHERE table_name=%s AND column_name IN ('homeTeam','awayTeam','team','opponent')",
            (table,),
        )
        cols = [row[0] for row in cur.fetchall()]
        if cols:
            uk = table_uk.get(table)
            u, d = normalize_table(cur, dry_run, table, cols, name_map, unique_key_cols_for_col=uk)
            print(f"  {table}: {u} updated, {d} dupes deleted")
            total += u + d

    print(f"  Phase 5 total: {total} {'(dry run)' if dry_run else ''}")
    return total

def phase6(cur, dry_run):
    print("\n=== Phase 6: Link orphaned OddsSnapshot to SportsGame ===")
    # Find orphaned snapshots that can be matched by team+date
    cur.execute("""
        SELECT os.id, g.id AS game_id
        FROM "OddsSnapshot" os
        JOIN "SportsGame" g ON g.league='ncaab'
            AND g."homeTeam" = os."homeTeam"
            AND g."awayTeam" = os."awayTeam"
            AND g."gameDate"::date = os."gameDate"::date
        WHERE os.league='ncaab' AND os."gameId" IS NULL
    """)
    rows = cur.fetchall()
    print(f"  Found {len(rows)} orphaned snapshots to link")

    if rows and not dry_run:
        for os_id, game_id in rows:
            cur.execute(
                'UPDATE "OddsSnapshot" SET "gameId"=%s WHERE id=%s',
                (game_id, os_id),
            )

    print(f"  Phase 6: {len(rows)} snapshots linked {'(dry run)' if dry_run else ''}")
    return len(rows)


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(description="Normalize NCAAB team names across all tables")
    parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing")
    parser.add_argument("--phase", type=int, choices=[1, 2, 3, 4, 5, 6], help="Run only this phase")
    args = parser.parse_args()

    conn = connect_db()
    cur = conn.cursor()

    try:
        print("Building name map...")
        name_map, canonical_set = build_name_map(cur)
        print(f"  {len(name_map)} dirty names → {len(canonical_set)} canonical names")

        grand_total = 0
        phases = [args.phase] if args.phase else [1, 2, 3, 4, 5, 6]

        for p in phases:
            if p == 1:
                grand_total += phase1(cur, args.dry_run, name_map)
            elif p == 2:
                grand_total += phase2(cur, args.dry_run, name_map)
            elif p == 3:
                grand_total += phase3(cur, args.dry_run, name_map)
            elif p == 4:
                grand_total += phase4(cur, args.dry_run, name_map)
            elif p == 5:
                grand_total += phase5(cur, args.dry_run, name_map)
            elif p == 6:
                grand_total += phase6(cur, args.dry_run)

        if not args.dry_run:
            conn.commit()
            print(f"\nCommitted. Total rows affected: {grand_total}")
        else:
            conn.rollback()
            print(f"\nDry run complete. Total rows that would be affected: {grand_total}")

    except Exception as e:
        conn.rollback()
        print(f"\nERROR: {e}")
        import traceback
        traceback.print_exc()
        raise
    finally:
        cur.close()
        conn.close()


if __name__ == "__main__":
    main()
