#!/usr/bin/env python3
"""
Backfill Opening/Closing Flags on PlayerPropLine
==================================================
Fixes the low coverage of isOpeningLine and isClosingLine flags.

For isOpeningLine:
  - The FIRST PPL row per (playerExternalId, gameId, propType, lineValue) is the opening.
  - Uses snapshotAt ordering (earliest = opening), falls back to createdAt.

For isClosingLine:
  - The LAST PPL row per (playerExternalId, gameId, propType, lineValue) before game start
    is the closing line.
  - Joins to SportsGame for game start time.
  - Only marks closing for games that have already started.

Usage:
  python3 backfill_opening_closing_flags.py [--days 60] [--league nba] [--dry-run]
"""
import os
import sys
import time
import argparse
import psycopg2
from datetime import datetime, timezone, timedelta

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from dotenv import load_dotenv
load_dotenv('/var/www/html/eventheodds/.env')

DB_URL = os.getenv('SPORTS_DATABASE_URL',
                    'postgresql://eventheodds:eventheodds_dev_password@127.0.0.1:5433/eventheodds_sports').split('?')[0]


def fix_opening_lines(cur, conn, league_filter, cutoff_date, dry_run):
    """Mark isOpeningLine = TRUE on the earliest PPL row per prop group."""
    print("\n" + "=" * 70)
    print("OPENING LINES: Mark earliest snapshot per prop group")
    print("=" * 70)

    league_clause = "AND ppl.league = %s" if league_filter else ""
    params = [cutoff_date]
    if league_filter:
        params.append(league_filter)

    # First, reset any incorrect opening flags (optional, conservative)
    # Then set opening on the earliest row per group

    sql = f"""
        WITH earliest AS (
            SELECT DISTINCT ON ("playerExternalId", "gameId", "propType")
                id
            FROM "PlayerPropLine"
            WHERE "createdAt" >= %s
              AND "oddsAmerican" IS NOT NULL
              {"AND league = %s" if league_filter else ""}
            ORDER BY "playerExternalId", "gameId", "propType",
                     COALESCE("snapshotAt", "createdAt") ASC
        )
        UPDATE "PlayerPropLine" ppl
        SET "isOpeningLine" = TRUE
        FROM earliest e
        WHERE ppl.id = e.id
          AND (ppl."isOpeningLine" IS NULL OR ppl."isOpeningLine" = FALSE)
    """

    if dry_run:
        cur.execute(f"""
            WITH earliest AS (
                SELECT DISTINCT ON ("playerExternalId", "gameId", "propType")
                    id
                FROM "PlayerPropLine"
                WHERE "createdAt" >= %s
                  AND "oddsAmerican" IS NOT NULL
                  {"AND league = %s" if league_filter else ""}
                ORDER BY "playerExternalId", "gameId", "propType",
                         COALESCE("snapshotAt", "createdAt") ASC
            )
            SELECT COUNT(*) FROM earliest e
            JOIN "PlayerPropLine" ppl ON ppl.id = e.id
            WHERE ppl."isOpeningLine" IS NULL OR ppl."isOpeningLine" = FALSE
        """, params)
        count = cur.fetchone()[0]
        print(f"  Would mark {count:,} rows as opening (dry run)")
        return count

    cur.execute(sql, params)
    updated = cur.rowcount
    conn.commit()
    print(f"  Marked {updated:,} rows as isOpeningLine = TRUE")
    return updated


def fix_closing_lines(cur, conn, league_filter, cutoff_date, dry_run):
    """Mark isClosingLine = TRUE on the last PPL snapshot before game start."""
    print("\n" + "=" * 70)
    print("CLOSING LINES: Mark last snapshot before game start")
    print("=" * 70)

    league_clause = "AND ppl.league = %s" if league_filter else ""
    params = [cutoff_date]
    if league_filter:
        params.append(league_filter)

    # Find the latest PPL row per (player, game, propType) for games that already started
    sql = f"""
        WITH latest_before_start AS (
            SELECT DISTINCT ON (ppl."playerExternalId", ppl."gameId", ppl."propType")
                ppl.id
            FROM "PlayerPropLine" ppl
            JOIN "SportsGame" sg ON ppl."gameId" = sg.id
            WHERE ppl."createdAt" >= %s
              AND sg."gameDate" <= NOW()
              AND ppl."oddsAmerican" IS NOT NULL
              {"AND ppl.league = %s" if league_filter else ""}
            ORDER BY ppl."playerExternalId", ppl."gameId", ppl."propType",
                     COALESCE(ppl."snapshotAt", ppl."createdAt") DESC
        )
        UPDATE "PlayerPropLine" ppl
        SET "isClosingLine" = TRUE
        FROM latest_before_start lbs
        WHERE ppl.id = lbs.id
          AND (ppl."isClosingLine" IS NULL OR ppl."isClosingLine" = FALSE)
    """

    if dry_run:
        cur.execute(f"""
            WITH latest_before_start AS (
                SELECT DISTINCT ON (ppl."playerExternalId", ppl."gameId", ppl."propType")
                    ppl.id
                FROM "PlayerPropLine" ppl
                JOIN "SportsGame" sg ON ppl."gameId" = sg.id
                WHERE ppl."createdAt" >= %s
                  AND sg."gameDate" <= NOW()
                  AND ppl."oddsAmerican" IS NOT NULL
                  {"AND ppl.league = %s" if league_filter else ""}
                ORDER BY ppl."playerExternalId", ppl."gameId", ppl."propType",
                         COALESCE(ppl."snapshotAt", ppl."createdAt") DESC
            )
            SELECT COUNT(*) FROM latest_before_start lbs
            JOIN "PlayerPropLine" ppl ON ppl.id = lbs.id
            WHERE ppl."isClosingLine" IS NULL OR ppl."isClosingLine" = FALSE
        """, params)
        count = cur.fetchone()[0]
        print(f"  Would mark {count:,} rows as closing (dry run)")
        return count

    cur.execute(sql, params)
    updated = cur.rowcount
    conn.commit()
    print(f"  Marked {updated:,} rows as isClosingLine = TRUE")
    return updated


def clear_incorrect_flags(cur, conn, league_filter, cutoff_date, dry_run):
    """
    Clear opening/closing flags that are incorrectly set.
    An opening flag is incorrect if it's not the earliest row per group.
    A closing flag is incorrect if the game hasn't started yet.
    """
    print("\n" + "=" * 70)
    print("CLEANUP: Remove incorrect flags")
    print("=" * 70)

    params = [cutoff_date]
    if league_filter:
        params.append(league_filter)

    # Clear closing flags for games that haven't started yet
    sql_clear = f"""
        UPDATE "PlayerPropLine" ppl
        SET "isClosingLine" = FALSE
        FROM "SportsGame" sg
        WHERE ppl."gameId" = sg.id
          AND ppl."isClosingLine" = TRUE
          AND sg."gameDate" > NOW()
          AND ppl."createdAt" >= %s
          {"AND ppl.league = %s" if league_filter else ""}
    """

    if dry_run:
        cur.execute(f"""
            SELECT COUNT(*) FROM "PlayerPropLine" ppl
            JOIN "SportsGame" sg ON ppl."gameId" = sg.id
            WHERE ppl."isClosingLine" = TRUE
              AND sg."gameDate" > NOW()
              AND ppl."createdAt" >= %s
              {"AND ppl.league = %s" if league_filter else ""}
        """, params)
        count = cur.fetchone()[0]
        print(f"  Would clear {count:,} incorrect closing flags (dry run)")
        return count

    cur.execute(sql_clear, params)
    cleared = cur.rowcount
    conn.commit()
    print(f"  Cleared {cleared:,} incorrect closing flags (game hasn't started)")
    return cleared


def main():
    parser = argparse.ArgumentParser(description='Backfill opening/closing flags')
    parser.add_argument('--days', type=int, default=60, help='Days to backfill')
    parser.add_argument('--league', type=str, default=None, help='Filter to league')
    parser.add_argument('--dry-run', action='store_true', help='Show counts only')
    args = parser.parse_args()

    cutoff_date = datetime.now(timezone.utc) - timedelta(days=args.days)

    print("=" * 70)
    print("OPENING/CLOSING FLAGS BACKFILL")
    print(f"  Days: {args.days}, Cutoff: {cutoff_date.strftime('%Y-%m-%d')}")
    print(f"  League: {args.league or 'ALL'}")
    print(f"  Dry run: {args.dry_run}")
    print("=" * 70)

    conn = psycopg2.connect(DB_URL)
    cur = conn.cursor()

    start = time.time()
    try:
        # Clean up first
        clear_incorrect_flags(cur, conn, args.league, cutoff_date, args.dry_run)

        # Then set correct flags
        opening_count = fix_opening_lines(cur, conn, args.league, cutoff_date, args.dry_run)
        closing_count = fix_closing_lines(cur, conn, args.league, cutoff_date, args.dry_run)

    except Exception as e:
        conn.rollback()
        print(f"\nERROR: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
    finally:
        cur.close()
        conn.close()

    elapsed = time.time() - start
    print(f"\n{'=' * 70}")
    print(f"DONE — opening: {opening_count:,}, closing: {closing_count:,} in {elapsed:.1f}s")
    print(f"{'=' * 70}")


if __name__ == '__main__':
    main()
