"""
Link Discovery Node
Discovers white-hat link building opportunities for human approval
"""

import os
import uuid
import httpx
from datetime import datetime
from typing import Any, Dict, List

from state.campaign_state import (
    SEOCampaignState,
    CampaignPhase,
    transition_phase,
    add_error,
    add_warning,
    LinkOpportunity,
    LinkOpportunityStatus,
    OpportunityType,
)


NEXT_API_URL = os.getenv("NEXT_API_URL", "http://127.0.0.1:3000")
MCP_URL = os.getenv("MCP_URL", "http://127.0.0.1:3001")


# Quality thresholds for link opportunities
LINK_QUALITY_THRESHOLDS = {
    "min_domain_authority": 40,
    "min_monthly_traffic": 5000,
    "min_relevance_score": 0.8,
    "blacklist_patterns": [
        "write-for-us",
        "guest-post-guidelines",
        "sponsored-post",
        "link-exchange",
        "buy-links",
        "paid-post",
        "advertise-with-us",
    ],
}


async def discover_haro_opportunities() -> List[dict]:
    """
    Discover HARO/Connectively opportunities

    In production, this would integrate with HARO API or monitoring service.
    For now, returns placeholder structure.
    """
    # TODO: Integrate with HARO/Connectively API
    return []


async def discover_unlinked_mentions(site_domain: str) -> List[dict]:
    """
    Find unlinked brand mentions across the web

    Uses MCP tool if available, otherwise returns empty.
    """
    mentions = []
    try:
        async with httpx.AsyncClient(timeout=60.0) as client:
            response = await client.post(
                f"{MCP_URL}/rpc",
                json={
                    "jsonrpc": "2.0",
                    "id": 1,
                    "method": "tools/call",
                    "params": {
                        "name": "link_opportunity_scorer",
                        "arguments": {
                            "type": "unlinked_mentions",
                            "domain": site_domain
                        }
                    }
                }
            )
            if response.status_code == 200:
                result = response.json().get("result", {})
                content = result.get("content", [])
                if content:
                    import json
                    data = json.loads(content[0].get("text", "{}"))
                    mentions = data.get("mentions", [])
    except Exception as e:
        print(f"[LinkDiscoveryNode] Unlinked mentions error: {e}")
    return mentions


async def discover_broken_links(competitors: List[dict]) -> List[dict]:
    """
    Find broken link opportunities from competitor backlinks

    Looks for 404 pages that competitors link to, which we could recreate.
    """
    broken_links = []
    try:
        async with httpx.AsyncClient(timeout=60.0) as client:
            for comp in competitors[:3]:  # Check top 3 competitors
                response = await client.post(
                    f"{MCP_URL}/rpc",
                    json={
                        "jsonrpc": "2.0",
                        "id": 1,
                        "method": "tools/call",
                        "params": {
                            "name": "link_opportunity_scorer",
                            "arguments": {
                                "type": "broken_links",
                                "competitor_domain": comp.get("domain", "")
                            }
                        }
                    }
                )
                if response.status_code == 200:
                    result = response.json().get("result", {})
                    content = result.get("content", [])
                    if content:
                        import json
                        data = json.loads(content[0].get("text", "{}"))
                        broken_links.extend(data.get("broken_links", []))
    except Exception as e:
        print(f"[LinkDiscoveryNode] Broken links error: {e}")
    return broken_links


def is_quality_opportunity(opp: dict) -> bool:
    """Check if an opportunity meets quality thresholds"""
    da = opp.get("domain_authority", 0)
    traffic = opp.get("monthly_traffic", 0)
    relevance = opp.get("relevance_score", 0)
    url = opp.get("url", "").lower()

    # Check domain authority
    if da < LINK_QUALITY_THRESHOLDS["min_domain_authority"]:
        return False

    # Check traffic
    if traffic < LINK_QUALITY_THRESHOLDS["min_monthly_traffic"]:
        return False

    # Check relevance
    if relevance < LINK_QUALITY_THRESHOLDS["min_relevance_score"]:
        return False

    # Check blacklist patterns
    for pattern in LINK_QUALITY_THRESHOLDS["blacklist_patterns"]:
        if pattern in url:
            return False

    return True


async def generate_pitch(opp: dict, site_info: dict) -> str:
    """
    Generate a personalized pitch for a link opportunity

    Uses Grok via MCP for research-based personalization.
    """
    default_pitch = f"""Hi,

I noticed you mentioned {site_info.get('topic', 'sports betting analytics')} on your site. I wanted to reach out because we have some complementary content at {site_info.get('domain', 'EventheOdds')} that your readers might find valuable.

{site_info.get('value_prop', 'We provide free sports analytics tools and data-driven betting insights.')}

Would you be interested in checking it out?

Best,
The EventheOdds Team"""

    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.post(
                f"{MCP_URL}/rpc",
                json={
                    "jsonrpc": "2.0",
                    "id": 1,
                    "method": "tools/call",
                    "params": {
                        "name": "ask_grok",
                        "arguments": {
                            "prompt": f"""Write a brief, personalized outreach email for link building.

Target site: {opp.get('source_domain', 'Unknown')}
Their content topic: {opp.get('content_topic', 'sports')}
Our site: {site_info.get('domain', 'EventheOdds')}
Our value: {site_info.get('value_prop', 'Sports analytics and betting data')}
Opportunity type: {opp.get('opportunity_type', 'mention')}

Requirements:
- Be conversational and genuine, NOT template-y
- Reference something specific about their content
- Clearly explain mutual benefit
- Keep it under 150 words
- Do NOT include subject line

Return ONLY the email body."""
                        }
                    }
                }
            )
            if response.status_code == 200:
                result = response.json().get("result", {})
                content = result.get("content", [])
                if content and content[0].get("text"):
                    return content[0]["text"]
    except Exception as e:
        print(f"[LinkDiscoveryNode] Pitch generation error: {e}")

    return default_pitch


async def link_discovery_phase(state: SEOCampaignState) -> SEOCampaignState:
    """
    Link Discovery phase node

    Discovers white-hat link building opportunities:
    - HARO/Connectively monitoring
    - Unlinked brand mentions
    - Broken link building (DA 40+ only)
    - Guest post prospects (strict filtering)

    ALL opportunities require human approval before any outreach.
    """
    print(f"[LinkDiscoveryNode] Starting link discovery for site {state['site_id']}")

    site_id = state["site_id"]
    now = datetime.utcnow().isoformat()

    opportunities: List[LinkOpportunity] = []
    site_info = {
        "domain": "eventheodds.ai",
        "topic": "sports betting analytics",
        "value_prop": "Free sports analytics tools with data from 50,000+ games analyzed",
    }

    try:
        # 1. Discover HARO opportunities
        try:
            haro_opps = await discover_haro_opportunities()
            for opp in haro_opps:
                if is_quality_opportunity(opp):
                    pitch = await generate_pitch(opp, site_info)
                    opportunities.append(LinkOpportunity(
                        id=str(uuid.uuid4()),
                        target_url=opp.get("url", ""),
                        source_domain=opp.get("domain", ""),
                        domain_authority=opp.get("domain_authority", 0),
                        monthly_traffic=opp.get("monthly_traffic"),
                        relevance_score=opp.get("relevance_score", 0.0),
                        opportunity_type=OpportunityType.HARO.value,
                        status=LinkOpportunityStatus.PENDING.value,
                        pitch_draft=pitch,
                        pitch_approved=None,
                        contact_email=opp.get("contact_email"),
                        contact_name=opp.get("contact_name"),
                        approved_by=None,
                        approved_at=None,
                        rejection_reason=None,
                        executed_at=None,
                        link_acquired=False,
                        acquired_url=None,
                        discovered_at=now,
                        notes=f"HARO query: {opp.get('query', '')}",
                    ))
        except Exception as e:
            print(f"[LinkDiscoveryNode] HARO discovery failed: {e}")

        # 2. Discover unlinked mentions
        try:
            mentions = await discover_unlinked_mentions(site_info["domain"])
            for mention in mentions:
                if is_quality_opportunity(mention):
                    pitch = await generate_pitch(mention, site_info)
                    opportunities.append(LinkOpportunity(
                        id=str(uuid.uuid4()),
                        target_url=mention.get("url", ""),
                        source_domain=mention.get("domain", ""),
                        domain_authority=mention.get("domain_authority", 0),
                        monthly_traffic=mention.get("monthly_traffic"),
                        relevance_score=mention.get("relevance_score", 0.0),
                        opportunity_type=OpportunityType.UNLINKED_MENTION.value,
                        status=LinkOpportunityStatus.PENDING.value,
                        pitch_draft=pitch,
                        pitch_approved=None,
                        contact_email=mention.get("contact_email"),
                        contact_name=mention.get("contact_name"),
                        approved_by=None,
                        approved_at=None,
                        rejection_reason=None,
                        executed_at=None,
                        link_acquired=False,
                        acquired_url=None,
                        discovered_at=now,
                        notes=f"Mention context: {mention.get('context', '')[:200]}",
                    ))
        except Exception as e:
            print(f"[LinkDiscoveryNode] Unlinked mentions discovery failed: {e}")

        # 3. Discover broken link opportunities
        try:
            broken = await discover_broken_links(state.get("competitor_data", []))
            for brk in broken:
                if is_quality_opportunity(brk):
                    pitch = await generate_pitch(brk, site_info)
                    opportunities.append(LinkOpportunity(
                        id=str(uuid.uuid4()),
                        target_url=brk.get("linking_page_url", ""),
                        source_domain=brk.get("domain", ""),
                        domain_authority=brk.get("domain_authority", 0),
                        monthly_traffic=brk.get("monthly_traffic"),
                        relevance_score=brk.get("relevance_score", 0.0),
                        opportunity_type=OpportunityType.BROKEN_LINK.value,
                        status=LinkOpportunityStatus.PENDING.value,
                        pitch_draft=pitch,
                        pitch_approved=None,
                        contact_email=brk.get("contact_email"),
                        contact_name=brk.get("contact_name"),
                        approved_by=None,
                        approved_at=None,
                        rejection_reason=None,
                        executed_at=None,
                        link_acquired=False,
                        acquired_url=None,
                        discovered_at=now,
                        notes=f"Broken URL: {brk.get('broken_url', '')}",
                    ))
        except Exception as e:
            print(f"[LinkDiscoveryNode] Broken links discovery failed: {e}")

        print(f"[LinkDiscoveryNode] Discovered {len(opportunities)} quality opportunities")

    except Exception as e:
        print(f"[LinkDiscoveryNode] Error during link discovery: {e}")
        state = add_error(state, "link_discovery", f"Link discovery failed: {str(e)}")

    # Update state
    state["link_opportunities"] = opportunities
    state["pending_approvals"] = opportunities  # All need approval

    # Always proceed to report phase - don't pause for human review in demo mode
    # In production with real link opportunities, this would pause for approval
    if opportunities:
        state["requires_human_review"] = True
        state = add_warning(state, "link_discovery",
            f"Found {len(opportunities)} link opportunities - would require approval in production")
    else:
        state = add_warning(state, "link_discovery",
            "No quality link opportunities found - external APIs not configured")

    # Transition to report phase (in production with real opportunities, this would go to HUMAN_REVIEW)
    state = transition_phase(state, CampaignPhase.REPORT)
    state["updated_at"] = now

    return state
