#!/usr/bin/env python3
"""
Monitor Data Coverage - Alert if coverage drops
Run daily to ensure no regression in key metrics.
"""
import psycopg2
import os
from datetime import datetime, timezone

# Minimum acceptable coverage (alert if below)
# Updated 2026-01-20 to reflect current achieved levels
MIN_COVERAGE = {
    'nba': {'period_scoring': 12.0, 'referees': 35.0},
    'nfl': {'period_scoring': 2.5, 'referees': 35.0},
    'nhl': {'period_scoring': 6.0, 'referees': 25.0},
}


def load_db_url():
    env_path = '/var/www/html/eventheodds/.env'
    try:
        with open(env_path, 'r') as f:
            for line in f:
                if line.startswith('SPORTS_DATABASE_URL='):
                    return line.split('=', 1)[1].strip().split('?')[0]
    except:
        pass
    return os.environ.get('SPORTS_DATABASE_URL', '').split('?')[0]


def check_coverage():
    db_url = load_db_url()
    conn = psycopg2.connect(db_url)
    cur = conn.cursor()

    print("=" * 60)
    print("DATA COVERAGE MONITOR")
    print(f"Time: {datetime.now(timezone.utc).isoformat()}")
    print("=" * 60)

    alerts = []

    # Check period scoring coverage
    cur.execute('''
        SELECT league,
               COUNT(*) as total,
               COUNT(*) FILTER (WHERE "homeQ1" IS NOT NULL) as with_periods
        FROM "SportsGame"
        WHERE league IN ('nba', 'nfl', 'nhl')
          AND "gameDate" < NOW()
        GROUP BY league
    ''')

    print("\nPERIOD SCORING:")
    for league, total, with_periods in cur.fetchall():
        pct = (with_periods / total * 100) if total > 0 else 0
        min_pct = MIN_COVERAGE.get(league, {}).get('period_scoring', 0)
        status = "✅" if pct >= min_pct else "⚠️ ALERT"
        print(f"  {league.upper()}: {pct:.1f}% ({with_periods:,}/{total:,}) {status}")
        if pct < min_pct:
            alerts.append(f"{league} period scoring dropped to {pct:.1f}% (min: {min_pct}%)")

    # Check referee coverage
    cur.execute('''
        SELECT g.league, COUNT(DISTINCT g.id) as total,
               COUNT(DISTINCT r."gameId") as with_refs
        FROM "SportsGame" g
        LEFT JOIN "GameReferee" r ON g.id = r."gameId"
        WHERE g.league IN ('nba', 'nfl', 'nhl')
          AND g."gameDate" < NOW()
          AND g."gameDate" > NOW() - INTERVAL '180 days'
        GROUP BY g.league
    ''')

    print("\nREFEREE COVERAGE (last 180 days):")
    for league, total, with_refs in cur.fetchall():
        pct = (with_refs / total * 100) if total > 0 else 0
        min_pct = MIN_COVERAGE.get(league, {}).get('referees', 0)
        status = "✅" if pct >= min_pct else "⚠️ ALERT"
        print(f"  {league.upper()}: {pct:.1f}% ({with_refs:,}/{total:,}) {status}")
        if pct < min_pct:
            alerts.append(f"{league} referee coverage dropped to {pct:.1f}% (min: {min_pct}%)")

    # Check odds snapshots (should have recent data)
    cur.execute('''
        SELECT COUNT(*), MAX("snapshotAt")
        FROM "OddsSnapshot"
        WHERE "snapshotAt" > NOW() - INTERVAL '24 hours'
    ''')
    snap_count, last_snap = cur.fetchone()
    print(f"\nODDS SNAPSHOTS (24h): {snap_count}")
    if snap_count < 10:
        alerts.append(f"Only {snap_count} odds snapshots in last 24h")

    conn.close()

    # Summary
    print("\n" + "=" * 60)
    if alerts:
        print("⚠️  ALERTS:")
        for alert in alerts:
            print(f"  - {alert}")
        return 1
    else:
        print("✅ All coverage metrics within acceptable range")
        return 0


if __name__ == '__main__':
    exit(check_coverage())
