#!/usr/bin/env npx ts-node
/**
 * MLB Entity Resolution — Link CanonicalPlayer to PlayerGameModel
 *
 * MLB has 1,054 CanonicalPlayer entries but 0% external ID linkage.
 * This script matches CanonicalPlayer to PGM rows by name and populates sgoId.
 *
 * Phase 1: Name matching (exact + normalized)
 * Phase 2 (future): FanGraphs ID crosswalk via CSV
 *
 * Usage:
 *   npx ts-node src/scripts/link-mlb-players.ts
 *   npx ts-node src/scripts/link-mlb-players.ts --dry-run
 */

import { Pool } from 'pg';
import dotenv from 'dotenv';
import path from 'path';
import { buildDatabasePoolConfig } from '../db/config';

dotenv.config({ path: path.join(__dirname, '../../.env') });

const pool = new Pool(buildDatabasePoolConfig({
  max: 5,
  idleTimeoutMillis: 10_000,
}));

const DRY_RUN = process.argv.includes('--dry-run');

/** Normalize a player name for fuzzy matching */
function normalizeName(name: string): string {
  return name
    .toLowerCase()
    .replace(/[^a-z\s]/g, '')   // strip non-alpha
    .replace(/\s+/g, ' ')        // collapse whitespace
    .replace(/\b(jr|sr|ii|iii|iv)\b/g, '') // strip suffixes
    .trim();
}

async function main() {
  console.log('='.repeat(80));
  console.log('MLB CanonicalPlayer → PGM Linker');
  console.log(`Mode: ${DRY_RUN ? 'DRY RUN' : 'LIVE'}`);
  console.log('='.repeat(80));

  // Get MLB canonical players without sgoId
  const { rows: canonPlayers } = await pool.query(
    `SELECT id, name, "sgoId" FROM "CanonicalPlayer" WHERE league = 'mlb' ORDER BY name`
  );
  const unlinked = canonPlayers.filter((p: any) => !p.sgoId);
  console.log(`\nMLB CanonicalPlayer: ${canonPlayers.length} total, ${unlinked.length} unlinked`);

  // Get distinct PGM players for MLB
  const { rows: pgmPlayers } = await pool.query(
    `SELECT DISTINCT "playerName", "playerId", "teamName"
     FROM "PlayerGameModel"
     WHERE league = 'mlb' AND "playerName" IS NOT NULL
     ORDER BY "playerName"`
  );
  console.log(`PGM distinct MLB players: ${pgmPlayers.length}`);

  // Build normalized lookup: normalizedName -> { playerId, playerName, teamName }
  const pgmLookup = new Map<string, { playerId: string; playerName: string; teamName: string }>();
  for (const p of pgmPlayers) {
    const norm = normalizeName(p.playerName);
    if (!pgmLookup.has(norm)) {
      pgmLookup.set(norm, { playerId: p.playerId, playerName: p.playerName, teamName: p.teamName });
    }
  }

  let linked = 0;
  let aliasCreated = 0;
  let noMatch = 0;

  for (const cp of unlinked) {
    const normCanon = normalizeName(cp.name);

    // Try exact normalized match
    const match = pgmLookup.get(normCanon);
    if (!match) {
      noMatch++;
      continue;
    }

    if (DRY_RUN) {
      console.log(`  [DRY] ${cp.name} → PGM "${match.playerName}" (${match.playerId})`);
      linked++;
      continue;
    }

    try {
      // Update sgoId on CanonicalPlayer
      await pool.query(
        `UPDATE "CanonicalPlayer" SET "sgoId" = $1, "updatedAt" = NOW() WHERE id = $2`,
        [match.playerId, cp.id]
      );

      // Create PlayerAlias if names differ
      if (cp.name !== match.playerName) {
        await pool.query(
          `INSERT INTO "PlayerAlias" ("canonicalPlayerId", alias, source, "createdAt", "updatedAt")
           VALUES ($1, $2, 'pgm_link', NOW(), NOW())
           ON CONFLICT DO NOTHING`,
          [cp.id, match.playerName]
        );
        aliasCreated++;
      }
      linked++;
    } catch (err: any) {
      console.error(`  [ERR] ${cp.name}: ${err.message}`);
    }
  }

  console.log(`\nResults: ${linked} linked, ${aliasCreated} aliases created, ${noMatch} no match`);
  console.log(`Link rate: ${canonPlayers.length > 0 ? Math.round(((canonPlayers.length - unlinked.length + linked) / canonPlayers.length) * 100) : 0}%`);

  await pool.end();
}

main().catch((err) => {
  console.error('Fatal error:', err);
  process.exit(1);
});
