"""
Entity Extractor for conversation analysis and user profiling
Extracts user information from conversations to build memory profiles
"""

import json
import re
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime, timedelta
import logging

logger = logging.getLogger(__name__)


class EntityExtractor:
    """Extracts entities from user conversations for memory and personalization"""

    def __init__(self, grok_client=None):
        self.grok_client = grok_client

        # Patterns for basic entity extraction
        self.patterns = {
            'age': re.compile(r'\b(\d{1,3})\s*(?:years?\s*old|yo)\b', re.IGNORECASE),
            'profession': re.compile(r'\b(?:i\'?m|am|i\s+work\s+as|my\s+job\s+is)\s+(?:a|an)?\s*([a-zA-Z\s]+?)(?:\s*(?:and|,|\.|$))', re.IGNORECASE),
            'location': re.compile(r'\b(?:i\'?m|live|from|in)\s+([A-Z][a-zA-Z\s,]+?)(?:\s*(?:and|,|\.|$))', re.IGNORECASE),
            'experience_years': re.compile(r'\b(\d+)(?:\s*(?:years?|yrs?))\s*(?:of\s*)?(?:experience|exp)\b', re.IGNORECASE),
        }

        # Expertise level keywords
        self.expertise_keywords = {
            'expert': ['expert', 'professional', 'experienced', 'advanced', 'specialist', 'senior', 'lead'],
            'intermediate': ['intermediate', 'some experience', 'moderate', 'mid-level', 'knowledgeable'],
            'beginner': ['beginner', 'new', 'starting', 'novice', 'learning', 'first time']
        }

        # Interest categories
        self.interest_categories = {
            'business': ['business', 'entrepreneurship', 'startup', 'company', 'revenue', 'profit', 'market'],
            'finance': ['finance', 'investment', 'funding', 'capital', 'money', 'budget', 'roi'],
            'health': ['health', 'medical', 'wellness', 'therapy', 'treatment', 'patient'],
            'legal': ['legal', 'law', 'compliance', 'regulation', 'license', 'permit'],
            'psychology': ['psychology', 'mental health', 'therapy', 'counseling', 'anxiety', 'depression'],
            'history': ['history', 'historical', 'civilization', 'ancient', 'timeline'],
            'hydroponics': ['hydroponics', 'hydro', 'soilless', 'dwc', 'nft', 'aeroponic'],
            'cannabis': ['cannabis', 'marijuana', 'hemp', 'cbd', 'thc', 'strain', 'cultivation']
        }

    def extract_entities(self, user_message: str, assistant_response: str = "") -> Dict[str, Any]:
        """Extract entities from a conversation turn"""
        try:
            # First try LLM-based extraction if available
            if self.grok_client:
                return self._extract_with_llm(user_message, assistant_response)
            else:
                return self._extract_with_patterns(user_message, assistant_response)
        except Exception as e:
            logger.error(f"Entity extraction failed: {e}")
            return {}

    def _extract_with_llm(self, user_message: str, assistant_response: str) -> Dict[str, Any]:
        """Use LLM for sophisticated entity extraction"""
        conversation = f"User: {user_message}\nAssistant: {assistant_response}"

        prompt = f"""
        Extract explicit user attributes from this conversation. Only extract information that is CLEARLY and EXPLICITLY stated:

        {conversation}

        Extract ONLY these categories if explicitly mentioned:
        - demographics: age, profession/role, location, experience level
        - interests: specific topics or areas they express interest in
        - survey_preferences: preferences about survey format (multiple choice, open-ended, etc.)
        - expertise_level: their self-described experience level (beginner/intermediate/expert)
        - personality_traits: communication style, preferences about interaction

        Return as valid JSON with these exact keys. If nothing is explicitly stated, use empty objects/arrays.
        Be very conservative - only extract what's clearly stated.
        """

        try:
            response = self.grok_client.chat_complete(prompt)
            entities = json.loads(response)

            # Validate and clean the response
            return self._validate_entities(entities)

        except (json.JSONDecodeError, Exception) as e:
            logger.warning(f"LLM entity extraction failed: {e}")
            return self._extract_with_patterns(user_message, assistant_response)

    def _extract_with_patterns(self, user_message: str, assistant_response: str) -> Dict[str, Any]:
        """Fallback pattern-based entity extraction"""
        entities = {
            'demographics': {},
            'interests': [],
            'survey_preferences': {},
            'expertise_level': None,
            'personality_traits': {}
        }

        text = f"{user_message} {assistant_response}"

        # Extract demographics with patterns
        age_match = self.patterns['age'].search(text)
        if age_match:
            entities['demographics']['age'] = int(age_match.group(1))

        profession_match = self.patterns['profession'].search(text)
        if profession_match:
            entities['demographics']['profession'] = profession_match.group(1).strip()

        location_match = self.patterns['location'].search(text)
        if location_match:
            entities['demographics']['location'] = location_match.group(1).strip()

        exp_match = self.patterns['experience_years'].search(text)
        if exp_match:
            entities['demographics']['experience_years'] = int(exp_match.group(1))

        # Extract interests
        entities['interests'] = self._extract_interests(text)

        # Extract expertise level
        entities['expertise_level'] = self._extract_expertise_level(text)

        # Extract survey preferences
        entities['survey_preferences'] = self._extract_survey_preferences(text)

        return entities

    def _extract_interests(self, text: str) -> List[str]:
        """Extract user interests from text"""
        interests = []
        text_lower = text.lower()

        for category, keywords in self.interest_categories.items():
            if any(keyword in text_lower for keyword in keywords):
                interests.append(category)

        return list(set(interests))  # Remove duplicates

    def _extract_expertise_level(self, text: str) -> Optional[str]:
        """Extract expertise level from text"""
        text_lower = text.lower()

        for level, keywords in self.expertise_keywords.items():
            if any(keyword in text_lower for keyword in keywords):
                return level

        return None

    def _extract_survey_preferences(self, text: str) -> Dict[str, Any]:
        """Extract survey format preferences"""
        preferences = {}
        text_lower = text.lower()

        # Question type preferences
        if 'multiple choice' in text_lower or 'multiple-choice' in text_lower:
            preferences['question_type'] = 'multiple_choice'
        elif 'open ended' in text_lower or 'open-ended' in text_lower:
            preferences['question_type'] = 'open_ended'

        # Length preferences
        if 'short' in text_lower and 'answer' in text_lower:
            preferences['answer_length'] = 'short'
        elif 'detailed' in text_lower or 'long' in text_lower:
            preferences['answer_length'] = 'detailed'

        return preferences

    def _validate_entities(self, entities: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and clean extracted entities"""
        validated = {
            'demographics': entities.get('demographics', {}),
            'interests': entities.get('interests', []),
            'survey_preferences': entities.get('survey_preferences', {}),
            'expertise_level': entities.get('expertise_level'),
            'personality_traits': entities.get('personality_traits', {})
        }

        # Ensure interests is a list
        if not isinstance(validated['interests'], list):
            validated['interests'] = []

        # Validate expertise level
        if validated['expertise_level'] not in ['beginner', 'intermediate', 'expert', None]:
            validated['expertise_level'] = None

        return validated

    def process_conversation_history(self, conversation_history: List[Dict]) -> Dict[str, Any]:
        """Process entire conversation history for comprehensive profile"""
        all_entities = []

        for turn in conversation_history:
            user_msg = turn.get('user_message', '')
            assistant_msg = turn.get('assistant_response', '')
            entities = self.extract_entities(user_msg, assistant_msg)
            all_entities.append(entities)

        # Merge all entities
        return self._merge_entities(all_entities)

    def _merge_entities(self, entities_list: List[Dict]) -> Dict[str, Any]:
        """Merge multiple entity extractions into a comprehensive profile"""
        merged = {
            'demographics': {},
            'interests': [],
            'survey_preferences': {},
            'expertise_level': None,
            'personality_traits': {},
            'confidence_scores': {}
        }

        for entities in entities_list:
            # Merge demographics (prefer most recent)
            merged['demographics'].update(entities.get('demographics', {}))

            # Merge interests (accumulate unique)
            merged['interests'].extend(entities.get('interests', []))
            merged['interests'] = list(set(merged['interests']))

            # Merge survey preferences (prefer most recent)
            merged['survey_preferences'].update(entities.get('survey_preferences', {}))

            # Set expertise level (prefer higher levels)
            current_level = merged['expertise_level']
            new_level = entities.get('expertise_level')

            if new_level and (not current_level or self._level_hierarchy(new_level) > self._level_hierarchy(current_level)):
                merged['expertise_level'] = new_level

            # Merge personality traits
            merged['personality_traits'].update(entities.get('personality_traits', {}))

        return merged

    def _level_hierarchy(self, level: str) -> int:
        """Convert expertise level to hierarchy number"""
        hierarchy = {'beginner': 1, 'intermediate': 2, 'expert': 3}
        return hierarchy.get(level, 0)

    def get_entity_confidence(self, entity_type: str, entity_value: str, context: str) -> float:
        """Calculate confidence score for an entity extraction"""
        # Simple confidence calculation based on context
        confidence = 0.5  # Base confidence

        # Increase confidence for explicit statements
        if any(word in context.lower() for word in ['i am', 'i\'m', 'my', 'i have']):
            confidence += 0.3

        # Increase confidence for repeated mentions
        if context.lower().count(entity_value.lower()) > 1:
            confidence += 0.2

        return min(confidence, 1.0)