#!/usr/bin/env npx tsx
/**
 * Full Canonical Backfill Script
 *
 * Backfills canonical IDs for all records from 2023 onwards.
 * Processes in batches to handle large volumes.
 *
 * Usage:
 *   npx tsx scripts/backfill-canonical-full.ts --league=nba
 *   npx tsx scripts/backfill-canonical-full.ts --table=PlayerGameMetric
 *   npx tsx scripts/backfill-canonical-full.ts  # all leagues, all tables
 */

import { PrismaClient } from '../prisma_sports/generated/sports-client';

const prisma = new PrismaClient();
const BATCH_SIZE = 1000;
const START_DATE = new Date('2023-01-01');

function normalizePlayerName(name: string): string {
  if (!name) return '';
  return name
    .toLowerCase()
    .normalize('NFD')
    .replace(/[\u0300-\u036f]/g, '')
    .replace(/\s+(jr\.?|sr\.?|iii|ii|iv|v)$/i, '')
    .replace(/\./g, '')
    .replace(/\s+/g, ' ')
    .trim();
}

function parseSgoPlayerId(sgoId: string): { name: string; league: string } | null {
  if (!sgoId || typeof sgoId !== 'string') return null;
  const leagueMatch = sgoId.match(/_(NBA|NFL|NHL|MLB|WNBA|NCAAB|NCAAF|MMA|UFC)$/i);
  if (!leagueMatch) return null;
  const league = leagueMatch[1].toLowerCase();
  const withoutLeague = sgoId.slice(0, -leagueMatch[0].length);
  const parts = withoutLeague.split('_');
  const lastPart = parts[parts.length - 1];
  if (!/^\d+$/.test(lastPart)) return null;
  const nameParts = parts.slice(0, -1);
  return { name: nameParts.join(' ').toLowerCase(), league };
}

// Cache for canonical player lookups
const playerCache = new Map<string, bigint | null>();

async function findCanonicalPlayer(league: string, playerExternalId: string, playerName?: string | null): Promise<bigint | null> {
  const cacheKey = `${league}:${playerExternalId}`;
  if (playerCache.has(cacheKey)) {
    return playerCache.get(cacheKey) || null;
  }

  // Try by alias first (fastest)
  const alias = await prisma.playerAlias.findFirst({
    where: { alias: playerExternalId },
    select: { playerId: true },
  });
  if (alias) {
    playerCache.set(cacheKey, alias.playerId);
    return alias.playerId;
  }

  // Try by SGO ID field
  const bySgoId = await prisma.canonicalPlayer.findFirst({
    where: { league, sgoId: playerExternalId },
    select: { id: true },
  });
  if (bySgoId) {
    playerCache.set(cacheKey, bySgoId.id);
    return bySgoId.id;
  }

  // Try by BDL/ESPN ID
  const byExternalId = await prisma.canonicalPlayer.findFirst({
    where: {
      league,
      OR: [
        { bdlId: playerExternalId },
        { espnId: playerExternalId },
      ],
    },
    select: { id: true },
  });
  if (byExternalId) {
    playerCache.set(cacheKey, byExternalId.id);
    return byExternalId.id;
  }

  // Try parsing SGO ID for name
  const parsed = parseSgoPlayerId(playerExternalId);
  if (parsed) {
    const byParsedName = await prisma.canonicalPlayer.findUnique({
      where: { league_normalizedName: { league, normalizedName: parsed.name } },
      select: { id: true },
    });
    if (byParsedName) {
      playerCache.set(cacheKey, byParsedName.id);
      return byParsedName.id;
    }
  }

  // Try by normalized player name
  if (playerName) {
    const normalized = normalizePlayerName(playerName);
    if (normalized) {
      const byName = await prisma.canonicalPlayer.findUnique({
        where: { league_normalizedName: { league, normalizedName: normalized } },
        select: { id: true },
      });
      if (byName) {
        playerCache.set(cacheKey, byName.id);
        return byName.id;
      }
    }
  }

  playerCache.set(cacheKey, null);
  return null;
}

// Cache for canonical game lookups
const gameCache = new Map<string, bigint | null>();

async function findCanonicalGame(league: string, gameId: bigint): Promise<bigint | null> {
  const cacheKey = `${league}:${gameId}`;
  if (gameCache.has(cacheKey)) {
    return gameCache.get(cacheKey) || null;
  }

  // Get the SportsGame record
  const sportsGame = await prisma.sportsGame.findUnique({
    where: { id: gameId },
    select: { externalGameId: true, gameDate: true, season: true, homeTeam: true, awayTeam: true },
  });

  if (!sportsGame) {
    gameCache.set(cacheKey, null);
    return null;
  }

  // Try by SGO event ID
  if (sportsGame.externalGameId) {
    const bySgoId = await prisma.canonicalGame.findFirst({
      where: { league, sgoEventId: sportsGame.externalGameId },
      select: { id: true },
    });
    if (bySgoId) {
      gameCache.set(cacheKey, bySgoId.id);
      return bySgoId.id;
    }

    // Try ESPN ID
    const byEspnId = await prisma.canonicalGame.findFirst({
      where: { league, espnEventId: sportsGame.externalGameId },
      select: { id: true },
    });
    if (byEspnId) {
      gameCache.set(cacheKey, byEspnId.id);
      return byEspnId.id;
    }
  }

  gameCache.set(cacheKey, null);
  return null;
}

async function backfillPlayerGameMetric(leagues: string[]): Promise<void> {
  console.log('\n=== Backfilling PlayerGameMetric ===\n');

  for (const league of leagues) {
    console.log(`Processing ${league.toUpperCase()}...`);

    // Get total count
    const total = await prisma.playerGameMetric.count({
      where: {
        league,
        gameDate: { gte: START_DATE },
        canonicalPlayerId: null,
      },
    });

    console.log(`  ${total.toLocaleString()} records to process`);

    let processed = 0;
    let updated = 0;
    let failed = 0;
    let offset = 0;

    while (offset < total) {
      const batch = await prisma.playerGameMetric.findMany({
        where: {
          league,
          gameDate: { gte: START_DATE },
          canonicalPlayerId: null,
        },
        select: { id: true, playerExternalId: true, playerName: true },
        take: BATCH_SIZE,
        skip: offset,
      });

      if (batch.length === 0) break;

      const updates: { id: bigint; canonicalPlayerId: bigint }[] = [];

      for (const record of batch) {
        const canonicalPlayerId = await findCanonicalPlayer(league, record.playerExternalId, record.playerName);
        if (canonicalPlayerId) {
          updates.push({ id: record.id, canonicalPlayerId });
        } else {
          failed++;
        }
        processed++;
      }

      // Batch update
      if (updates.length > 0) {
        await prisma.$transaction(
          updates.map(u =>
            prisma.playerGameMetric.update({
              where: { id: u.id },
              data: { canonicalPlayerId: u.canonicalPlayerId },
            })
          )
        );
        updated += updates.length;
      }

      offset += BATCH_SIZE;

      if (processed % 10000 === 0 || offset >= total) {
        console.log(`    Progress: ${processed.toLocaleString()}/${total.toLocaleString()} (${updated.toLocaleString()} linked, ${failed.toLocaleString()} unmatched)`);
      }
    }

    console.log(`  ${league.toUpperCase()} complete: ${updated.toLocaleString()} linked, ${failed.toLocaleString()} unmatched`);
  }
}

async function backfillPlayerPropLine(leagues: string[]): Promise<void> {
  console.log('\n=== Backfilling PlayerPropLine ===\n');

  for (const league of leagues) {
    console.log(`Processing ${league.toUpperCase()}...`);

    const total = await prisma.playerPropLine.count({
      where: {
        league,
        createdAt: { gte: START_DATE },
        canonicalPlayerId: null,
      },
    });

    console.log(`  ${total.toLocaleString()} records to process`);

    let processed = 0;
    let updated = 0;
    let failed = 0;
    let offset = 0;

    while (offset < total) {
      const batch = await prisma.playerPropLine.findMany({
        where: {
          league,
          createdAt: { gte: START_DATE },
          canonicalPlayerId: null,
        },
        select: { id: true, playerExternalId: true, gameId: true },
        take: BATCH_SIZE,
        skip: offset,
      });

      if (batch.length === 0) break;

      const updates: { id: bigint; canonicalPlayerId: bigint | null; canonicalGameId: bigint | null }[] = [];

      for (const record of batch) {
        const canonicalPlayerId = await findCanonicalPlayer(league, record.playerExternalId);
        let canonicalGameId: bigint | null = null;

        if (record.gameId) {
          canonicalGameId = await findCanonicalGame(league, record.gameId);
        }

        if (canonicalPlayerId || canonicalGameId) {
          updates.push({
            id: record.id,
            canonicalPlayerId: canonicalPlayerId,
            canonicalGameId: canonicalGameId
          });
        } else {
          failed++;
        }
        processed++;
      }

      // Batch update
      if (updates.length > 0) {
        await prisma.$transaction(
          updates.map(u =>
            prisma.playerPropLine.update({
              where: { id: u.id },
              data: {
                canonicalPlayerId: u.canonicalPlayerId,
                canonicalGameId: u.canonicalGameId,
              },
            })
          )
        );
        updated += updates.length;
      }

      offset += BATCH_SIZE;

      if (processed % 5000 === 0 || offset >= total) {
        console.log(`    Progress: ${processed.toLocaleString()}/${total.toLocaleString()} (${updated.toLocaleString()} linked)`);
      }
    }

    console.log(`  ${league.toUpperCase()} complete: ${updated.toLocaleString()} linked, ${failed.toLocaleString()} unmatched`);
  }
}

async function backfillPlayerInjury(leagues: string[]): Promise<void> {
  console.log('\n=== Backfilling PlayerInjury ===\n');

  for (const league of leagues) {
    console.log(`Processing ${league.toUpperCase()}...`);

    const total = await prisma.playerInjury.count({
      where: {
        league,
        canonicalPlayerId: null,
      },
    });

    console.log(`  ${total.toLocaleString()} records to process`);

    let updated = 0;
    let failed = 0;

    const records = await prisma.playerInjury.findMany({
      where: {
        league,
        canonicalPlayerId: null,
      },
      select: { id: true, playerExternalId: true, playerName: true },
    });

    for (const record of records) {
      const canonicalPlayerId = await findCanonicalPlayer(league, record.playerExternalId, record.playerName);

      if (canonicalPlayerId) {
        await prisma.playerInjury.update({
          where: { id: record.id },
          data: { canonicalPlayerId },
        });
        updated++;
      } else {
        failed++;
      }
    }

    console.log(`  ${league.toUpperCase()} complete: ${updated.toLocaleString()} linked, ${failed.toLocaleString()} unmatched`);
  }
}

async function main() {
  const args = process.argv.slice(2);
  const leagueArg = args.find(a => a.startsWith('--league='))?.split('=')[1];
  const tableArg = args.find(a => a.startsWith('--table='))?.split('=')[1];

  const leagues = leagueArg ? [leagueArg] : ['nba', 'nfl', 'nhl', 'mlb'];
  const tables = tableArg ? [tableArg] : ['PlayerGameMetric', 'PlayerPropLine', 'PlayerInjury'];

  console.log('='.repeat(60));
  console.log('FULL CANONICAL BACKFILL (2023+)');
  console.log('='.repeat(60));
  console.log(`Leagues: ${leagues.join(', ').toUpperCase()}`);
  console.log(`Tables: ${tables.join(', ')}`);
  console.log(`Start Date: ${START_DATE.toISOString().slice(0, 10)}`);

  try {
    if (tables.includes('PlayerGameMetric')) {
      await backfillPlayerGameMetric(leagues);
    }
    if (tables.includes('PlayerPropLine')) {
      await backfillPlayerPropLine(leagues);
    }
    if (tables.includes('PlayerInjury')) {
      await backfillPlayerInjury(leagues);
    }

    console.log('\n' + '='.repeat(60));
    console.log('BACKFILL COMPLETE');
    console.log('='.repeat(60));

  } finally {
    await prisma.$disconnect();
  }
}

main().catch((e) => {
  console.error('Backfill failed:', e);
  process.exit(1);
});
