/**
 * backfill-trace.ts
 *
 * Retroactively links the traceability chain for the last 30 days:
 *   Graded Forecast -> Archive -> Blog Post
 *
 * Run: npx ts-node scripts/backfill-trace.ts
 *
 * Safe to run multiple times (idempotent).
 */

import { Pool } from 'pg';
import dotenv from 'dotenv';
import path from 'path';

dotenv.config({ path: path.join(__dirname, '../.env') });

const pool = new Pool({ connectionString: process.env.DATABASE_URL });

async function backfill() {
  console.log('=== Forecast Traceability Backfill ===\n');

  // Step 1: Link orphan blog posts to their archived forecasts
  console.log('[1/5] Linking orphan blog posts to archived forecasts...');
  const linkBlogToArchive = await pool.query(`
    UPDATE rm_blog_posts bp
    SET archived_forecast_id = af.id
    FROM rm_archived_forecasts af
    WHERE bp.archived_forecast_id IS NULL
      AND bp.status = 'published'
      AND bp.published_at >= NOW() - INTERVAL '30 days'
      AND af.blog_post_id IS NULL
      AND bp.sport = af.league
      AND bp.home_team = af.home_team
      AND bp.away_team = af.away_team
      AND bp.game_date = (af.starts_at AT TIME ZONE 'America/New_York')::date
    RETURNING bp.id, bp.slug
  `);
  console.log(`  Linked ${linkBlogToArchive.rows.length} blog posts -> archives`);

  // Step 2: Link archived forecasts back to their blog posts
  console.log('[2/5] Linking archived forecasts back to blog posts...');
  const linkArchiveToBlog = await pool.query(`
    UPDATE rm_archived_forecasts af
    SET blog_post_id = bp.id, updated_at = NOW()
    FROM rm_blog_posts bp
    WHERE af.blog_post_id IS NULL
      AND af.created_at >= NOW() - INTERVAL '30 days'
      AND bp.status = 'published'
      AND bp.archived_forecast_id = af.id
    RETURNING af.id, af.event_id
  `);
  console.log(`  Linked ${linkArchiveToBlog.rows.length} archives -> blog posts (by FK)`);

  // Step 2b: Also try slug-based matching for archives missing blog_post_id
  const linkArchiveToBlogSlug = await pool.query(`
    UPDATE rm_archived_forecasts af
    SET blog_post_id = bp.id, updated_at = NOW()
    FROM rm_blog_posts bp
    WHERE af.blog_post_id IS NULL
      AND af.created_at >= NOW() - INTERVAL '30 days'
      AND bp.status = 'published'
      AND bp.sport = af.league
      AND bp.home_team = af.home_team
      AND bp.away_team = af.away_team
      AND bp.game_date = (af.starts_at AT TIME ZONE 'America/New_York')::date
    RETURNING af.id, af.event_id
  `);
  console.log(`  Linked ${linkArchiveToBlogSlug.rows.length} archives -> blog posts (by match)`);

  // Step 3: Settle unsettled archives that have accuracy records
  console.log('[3/5] Settling pending archives with accuracy records...');
  const settleArchives = await pool.query(`
    UPDATE rm_archived_forecasts af
    SET
      outcome = CASE
        WHEN fa.final_grade = 'W' THEN 'win'
        WHEN fa.final_grade = 'L' THEN 'loss'
        WHEN fa.final_grade = 'P' THEN 'push'
        WHEN fa.predicted_winner = fa.actual_winner THEN 'win'
        ELSE 'loss'
      END,
      actual_winner = fa.actual_winner,
      actual_score = CASE
        WHEN fa.home_score IS NOT NULL AND fa.away_score IS NOT NULL
        THEN fa.home_score || '-' || fa.away_score
        ELSE af.actual_score
      END,
      settled_at = COALESCE(fa.resolved_at, NOW()),
      updated_at = NOW()
    FROM rm_forecast_accuracy fa
    WHERE af.outcome = 'pending'
      AND af.event_id = fa.event_id
      AND fa.actual_winner IS NOT NULL
      AND af.starts_at < NOW() - INTERVAL '4 hours'
      AND af.created_at >= NOW() - INTERVAL '30 days'
    RETURNING af.id, af.event_id, af.outcome
  `);
  console.log(`  Settled ${settleArchives.rows.length} pending archives`);

  // Step 4: Report mismatches
  console.log('[4/5] Checking for unmatched records...');

  const missingBlog = await pool.query(`
    SELECT COUNT(*) as cnt FROM rm_archived_forecasts
    WHERE blog_post_id IS NULL
      AND created_at >= NOW() - INTERVAL '30 days'
  `);
  const missingAccuracy = await pool.query(`
    SELECT COUNT(*) as cnt FROM rm_archived_forecasts af
    WHERE NOT EXISTS (SELECT 1 FROM rm_forecast_accuracy fa WHERE fa.event_id = af.event_id)
      AND af.outcome != 'pending'
      AND af.starts_at < NOW() - INTERVAL '4 hours'
      AND af.created_at >= NOW() - INTERVAL '30 days'
  `);
  const orphanBlogs = await pool.query(`
    SELECT COUNT(*) as cnt FROM rm_blog_posts
    WHERE archived_forecast_id IS NULL
      AND status = 'published'
      AND published_at >= NOW() - INTERVAL '30 days'
  `);
  const stalePending = await pool.query(`
    SELECT COUNT(*) as cnt FROM rm_archived_forecasts
    WHERE outcome = 'pending'
      AND starts_at < NOW() - INTERVAL '24 hours'
      AND created_at >= NOW() - INTERVAL '30 days'
  `);

  console.log(`  Archives without blog posts: ${missingBlog.rows[0].cnt}`);
  console.log(`  Settled archives without accuracy: ${missingAccuracy.rows[0].cnt}`);
  console.log(`  Orphan blog posts: ${orphanBlogs.rows[0].cnt}`);
  console.log(`  Stale pending (>24h): ${stalePending.rows[0].cnt}`);

  // Step 5: Summary
  console.log('\n[5/5] Summary');
  const total = await pool.query(`
    SELECT
      COUNT(*) as total,
      COUNT(*) FILTER (WHERE blog_post_id IS NOT NULL) as has_blog,
      COUNT(*) FILTER (WHERE outcome != 'pending') as settled,
      COUNT(*) FILTER (WHERE blog_post_id IS NOT NULL AND outcome != 'pending') as fully_linked
    FROM rm_archived_forecasts
    WHERE created_at >= NOW() - INTERVAL '30 days'
  `);
  const s = total.rows[0];
  console.log(`  Total archives (30d): ${s.total}`);
  console.log(`  With blog post: ${s.has_blog} (${s.total > 0 ? Math.round(s.has_blog / s.total * 100) : 0}%)`);
  console.log(`  Settled: ${s.settled} (${s.total > 0 ? Math.round(s.settled / s.total * 100) : 0}%)`);
  console.log(`  Fully linked: ${s.fully_linked} (${s.total > 0 ? Math.round(s.fully_linked / s.total * 100) : 0}%)`);

  console.log('\nBackfill complete.');
}

backfill()
  .catch((err) => {
    console.error('Backfill failed:', err);
    process.exit(1);
  })
  .finally(() => pool.end());
