"""
Tier 1: Official source ingestion.

Ingests roster, availability, and lineup data from the existing sportsdb tables
(PlayerInjury, SportsGame, PlayerGameMetric, etc.) which are already populated
by the SGO pipeline, CBS, ESPN, FantasyPros crons.

This module does NOT invent fake data — it reads what the existing ingestion
pipeline has already collected and normalizes it for validation.
"""

import logging
from datetime import datetime
from typing import List, Dict, Any, Optional

import psycopg2.extras

from .enums import AvailabilityStatus, LineupStatus, SourceTier
from .normalization import normalize_player_name, normalize_team_abbrev

log = logging.getLogger("pub-integrity")

# Map injury statuses from PlayerInjury table to our enums
INJURY_STATUS_MAP = {
    "out": AvailabilityStatus.OUT,
    "ir": AvailabilityStatus.INACTIVE,
    "suspension": AvailabilityStatus.SUSPENDED,
    "doubtful": AvailabilityStatus.DOUBTFUL,
    "questionable": AvailabilityStatus.QUESTIONABLE,
    "probable": AvailabilityStatus.PROBABLE,
    "day-to-day": AvailabilityStatus.QUESTIONABLE,
    "dtd": AvailabilityStatus.QUESTIONABLE,
    "active": AvailabilityStatus.AVAILABLE,
    "available": AvailabilityStatus.AVAILABLE,
}


def ingest_availability_from_injuries(conn, league: str, run_id: str) -> List[Dict[str, Any]]:
    """
    Read PlayerInjury table for confirmed availability statuses.
    Uses 2-source confirmation for OUT/IR/Suspension (matching existing logic in odds-refresh.ts).
    Single-source reports are included as QUESTIONABLE/PROBABLE.
    """
    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)

    # Get all injury reports for this league
    cur.execute("""
        SELECT pi."playerName", pi.status, pi.source, pi.team, pi."updatedAt",
               sc.source_count
        FROM "PlayerInjury" pi
        JOIN (
            SELECT LOWER("playerName") AS pn, status, COUNT(DISTINCT source) AS source_count
            FROM "PlayerInjury"
            WHERE LOWER(league) = LOWER(%s)
              AND "updatedAt" > NOW() - INTERVAL '7 days'
            GROUP BY LOWER("playerName"), status
        ) sc ON LOWER(pi."playerName") = sc.pn AND pi.status = sc.status
        WHERE LOWER(pi.league) = LOWER(%s)
          AND pi."updatedAt" > NOW() - INTERVAL '7 days'
        ORDER BY pi."updatedAt" DESC
    """, (league, league))

    rows = cur.fetchall()
    cur.close()

    # Deduplicate: take latest status per player, prefer multi-source
    player_reports: Dict[str, Dict[str, Any]] = {}
    for row in rows:
        pname = normalize_player_name(row["playerName"])
        if pname in player_reports:
            # Keep if this has more source confirmation
            existing = player_reports[pname]
            if row["source_count"] > existing.get("source_count", 0):
                player_reports[pname] = row
            continue
        player_reports[pname] = row

    results = []
    for pname, row in player_reports.items():
        raw_status = (row["status"] or "").lower().strip()
        avail = INJURY_STATUS_MAP.get(raw_status, AvailabilityStatus.UNKNOWN)

        # Require 2+ sources for hard blocking statuses
        if avail.is_blocking and row["source_count"] < 2:
            avail = AvailabilityStatus.QUESTIONABLE  # downgrade to uncertain

        results.append({
            "player_name": row["playerName"],
            "normalized_name": pname,
            "team": row.get("team", ""),
            "availability_status": avail,
            "detailed_status": raw_status,
            "source": f"PlayerInjury ({row['source_count']} sources)",
            "source_tier": SourceTier.OFFICIAL,
            "source_timestamp": row.get("updatedAt"),
            "run_id": run_id,
        })

    log.info(f"[official] Ingested {len(results)} availability reports for {league}")
    return results


def ingest_rosters_from_pgm(conn, league: str, run_id: str) -> List[Dict[str, Any]]:
    """
    Read PlayerGameMetric to determine current roster/team assignments.
    PGM tracks which players appear in games for which teams.
    """
    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)

    cur.execute("""
        SELECT DISTINCT ON (LOWER("playerName"))
            "playerName", team, "gameDate", league
        FROM "PlayerGameMetric"
        WHERE LOWER(league) = LOWER(%s)
          AND "gameDate" > NOW() - INTERVAL '30 days'
        ORDER BY LOWER("playerName"), "gameDate" DESC
    """, (league,))

    rows = cur.fetchall()
    cur.close()

    results = []
    for row in rows:
        results.append({
            "player_name": row["playerName"],
            "normalized_name": normalize_player_name(row["playerName"]),
            "team": normalize_team_abbrev(row["team"] or ""),
            "league": league,
            "source": "PlayerGameMetric",
            "source_tier": SourceTier.OFFICIAL,
            "last_game_date": row["gameDate"],
            "run_id": run_id,
        })

    log.info(f"[official] Ingested {len(results)} roster entries from PGM for {league}")
    return results


def ingest_today_games(conn, league: str) -> List[Dict[str, Any]]:
    """Read today's games from rm_events for the given league."""
    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    cur.execute("""
        SELECT event_id, league, home_team, away_team, home_short, away_short,
               starts_at, status
        FROM rm_events
        WHERE LOWER(league) = LOWER(%s)
          AND DATE(starts_at AT TIME ZONE 'America/New_York') = (NOW() AT TIME ZONE 'America/New_York')::date
          AND status != 'ended'
        ORDER BY starts_at ASC
    """, (league,))

    rows = cur.fetchall()
    cur.close()

    log.info(f"[official] Found {len(rows)} today's games for {league}")
    return [dict(r) for r in rows]


def ingest_forecast_props(conn, league: str) -> List[Dict[str, Any]]:
    """
    Read player prop highlights from today's forecasts in rm_forecast_cache.
    These are the props that would be published — the validation targets.
    """
    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    cur.execute("""
        SELECT fc.event_id, fc.league, fc.home_team, fc.away_team,
               fc.forecast_data, fc.confidence_score
        FROM rm_forecast_cache fc
        WHERE LOWER(fc.league) = LOWER(%s)
          AND (fc.starts_at AT TIME ZONE 'America/New_York')::date
              = (NOW() AT TIME ZONE 'America/New_York')::date
          AND fc.forecast_data IS NOT NULL
    """, (league,))

    rows = cur.fetchall()
    cur.close()

    props = []
    for row in rows:
        fd = row["forecast_data"] or {}
        prop_highlights = fd.get("prop_highlights", [])
        for prop in prop_highlights:
            player_name = prop.get("player", "")
            if not player_name:
                continue
            props.append({
                "event_id": row["event_id"],
                "league": league,
                "home_team": row["home_team"],
                "away_team": row["away_team"],
                "player_name": player_name,
                "normalized_name": normalize_player_name(player_name),
                "prop_type": prop.get("prop", ""),
                "recommendation": prop.get("recommendation", ""),
                "reasoning": prop.get("reasoning", ""),
            })

    log.info(f"[official] Found {len(props)} player props across {len(rows)} forecasts for {league}")
    return props