import torch
import re
import json
import threading
from pathlib import Path
from datetime import datetime
# Removed local LLM imports - now using Grok API
# from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from collections import OrderedDict
from itertools import combinations
from concurrent.futures import ThreadPoolExecutor, as_completed
from airagagent.config import (
    LANGUAGE_MODEL,
    MAX_NEW_TOKENS,
    TEMPERATURE,
    DO_SAMPLE,
    NOTES_DIR,
    KNOWLEDGE_CARD_LLM_BUDGET,
    DIGESTS_DIR,
    CAPSULES_DIR,
    DOCUMENTS_DIR,
)
from typing import List, Dict, Any, Tuple, Optional, Set, Iterable
from difflib import SequenceMatcher
from rapidfuzz import fuzz
from airagagent.grok_api import call_grok_api, generate_answer_with_grok

class RAGAgent:
    """An intelligent agent that guides RAG responses with reasoning"""

    GENERIC_FALLBACK_MESSAGE = (
        "Based on the available documents, I can provide information about financial markets, "
        "trading strategies, and backtesting analysis. Please ask a more specific question "
        "about trading, investing, or market analysis."
    )

    COMMON_STOP_WORDS = {
        'the', 'and', 'for', 'with', 'from', 'that', 'this', 'those', 'these', 'there',
        'their', 'about', 'into', 'were', 'have', 'has', 'had', 'which', 'what', 'when',
        'where', 'why', 'how', 'who', 'whose', 'whom', 'shall', 'will', 'would', 'could',
        'should', 'can', 'may', 'might', 'a', 'an', 'of', 'in', 'on', 'at', 'as', 'is',
        'are', 'was', 'be', 'been', 'being', 'it', 'its', 'by', 'or', 'to', 'do', 'did',
        'does', 'done', 'so', 'if', 'but', 'not', 'no', 'yes', 'each', 'any', 'all', 'per',
        'such', 'than', 'among', 'between', 'within', 'without', 'over', 'under', 'more',
        'most', 'some', 'other', 'also', 'available', 'according', 'document', 'documents'
    }

    def __init__(self):
        # Removed local LLM - now using Grok API
        self.model = None
        self.tokenizer = None
        self.pipeline = None
        self.knowledge_card_llm_budget = max(0, KNOWLEDGE_CARD_LLM_BUDGET)
        self.knowledge_card_llm_used = 0
        self.knowledge_card_llm_enabled = True  # Always enabled with API
        self.short_term_memory: List[str] = []
        self.memory_limit = 4
        self.document_digests: Dict[str, Dict[str, Any]] = {}
        self.theme_capsules: Dict[str, Dict[str, Any]] = {}
        self.entity_graph: Dict[str, Dict[str, Any]] = {}
        self.load_distilled_resources()
        self.entity_graph = self.build_entity_graph_index()
        self._chunk_cache: Dict[str, Optional[Dict[str, Any]]] = {}
        # No longer loading local model - using Grok API instead
        # self.load_model()

    def load_distilled_resources(self):
        """Load previously distilled document digests and theme capsules."""
        try:
            self.document_digests = self.load_document_digests()
        except Exception as e:
            print(f"Warning: failed to load document digests: {e}")
            self.document_digests = {}

        try:
            self.theme_capsules = self.load_theme_capsules()
        except Exception as e:
            print(f"Warning: failed to load theme capsules: {e}")
            self.theme_capsules = {}

    def load_model(self):
        """Load language model - DEPRECATED: Now using Grok API"""
        # No longer loading local model - using Grok Fast API instead
        print("Using Grok Fast API - no local model loading required")
        pass
    
    def _call_grok_for_generation(self, prompt: str, max_tokens: int = 150, temperature: float = 0.3) -> str:
        """Helper method to call Grok API for text generation (replaces pipeline calls)"""
        response = call_grok_api(
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temperature
        )
        if response:
            # Remove the prompt from the response if it's included
            if prompt in response:
                return response[len(prompt):].strip()
            return response.strip()
        return ""

    def correct_spelling(self, text: str) -> str:
        """Basic spelling correction for common OCR/misspelling issues"""
        corrections = {
            "amerca": "america",
            "columbis": "columbus",
            "befor": "before",
            "culures": "cultures",
            "culturs": "cultures",
            "befor": "before",
            "clumbus": "columbus",
            "destiny": "destiny",  # Keep as is
            "secrect": "secret",
            "philosopy": "philosophy",
            "mystical": "mystical",
            "tradition": "tradition"
        }
        for wrong, right in corrections.items():
            text = text.replace(wrong, right)
        return text

    def clean_ocr_text(self, text: str) -> str:
        """Clean common OCR artifacts while preserving legitimate content"""
        if text is None:
            return ""

        original_text = text

        # Normalize newlines and unicode dashes early
        text = text.replace('—', '-').replace('¬', ' ')

        # Remove OCR artifacts and headers more aggressively
        text = re.sub(r'\\<\^\^?', '', text)  # Remove angle bracket artifacts
        text = re.sub(r'\^\^', '', text)  # Remove double carets
        text = re.sub(r'K\d{2,}', '', text)  # Remove K followed by numbers
        text = re.sub(r'\d{12,}', '', text)  # Remove extremely long numbers
        text = re.sub(r'\b\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\s+\d{2}\b', '', text)  # Remove catalog numbers

        # Remove specific OCR patterns seen in scanned books
        text = re.sub(r'org/details/[^\s]+', '', text, flags=re.IGNORECASE)  # Remove archive.org links
        text = re.sub(r'Christopher\s+Columbus\s+and\s+the\s+participati[^\n]*', '', text, flags=re.IGNORECASE)
        text = re.sub(r'Expulsion of the Jews\.*', '', text, flags=re.IGNORECASE)
        text = re.sub(r'\bSAGE\s+ENDOWMENT\s+FUND[^\n]*', '', text, flags=re.IGNORECASE)
        text = re.sub(r'\bSAGE\s+ENDOWMENT[^\n]*', '', text, flags=re.IGNORECASE)
        text = re.sub(r'\bTHE\s+GIFT\s+OF[^\n]*', '', text, flags=re.IGNORECASE)
        text = re.sub(r'ipmumyi;[^ ]*', '', text, flags=re.IGNORECASE)

        # Remove known boilerplate blocks
        text = re.sub(r'The original ofthisbook.*?(?=\w)', '', text, flags=re.IGNORECASE | re.DOTALL)
        text = re.sub(r'bought with the income from the sage endowment fund.*?(?=\w)', '', text, flags=re.IGNORECASE | re.DOTALL)

        # Clean up common OCR errors
        text = re.sub(r'\bAS\b', '', text)  # Remove standalone "AS"
        text = re.sub(r'\bbook\s*:\s*', '', text, flags=re.IGNORECASE)  # Remove "book:" prefixes
        text = re.sub(r'\bstated earlier\s*,?', '', text, flags=re.IGNORECASE)
        text = re.sub(r'\bb^\b', '', text, flags=re.IGNORECASE)

        # Fix common OCR spacing issues like "Christia n"
        text = re.sub(r'([A-Za-z])\s+([A-Za-z])\s+([A-Za-z])', lambda m: m.group(1) + m.group(2) + ' ' + m.group(3) if m.group(2).islower() else m.group(0), text)
        text = re.sub(r'(\w)\s+(\w)\.', r'\1\2.', text)

        # Remove leftover page artifacts or repeated hyphen blocks
        text = re.sub(r'[•%\^]+', ' ', text)
        text = re.sub(r'_{2,}', ' ', text)

        # Remove lines that are mostly digits or punctuation
        text = re.sub(r'\b\d{4,}\b', '', text)

        # Clean up spacing and punctuation
        text = re.sub(r'\s+', ' ', text)  # Normalize whitespace
        text = re.sub(r'\n+', ' ', text)  # Replace multiple newlines with spaces
        text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)  # Fix spacing after punctuation
        text = text.strip()

        if not text:
            return ""

        # Only return cleaned text if it's not too different (preserve content)
        cleaned_length = len(text)
        original_length = len(original_text.strip())
        if original_length > 0 and cleaned_length < original_length * 0.25:
            return original_text.strip()

        return text

    def score_content_quality(self, text: str) -> float:
        """Score the quality of text content (higher is better)"""
        if not text.strip():
            return 0.0

        # Quality indicators
        quality_score = 0.0

        # Length bonus (meaningful content tends to be longer)
        word_count = len(text.split())
        if word_count > 10:
            quality_score += min(word_count / 50, 2.0)  # Cap at 2.0

        # Penalty for OCR artifacts
        ocr_indicators = ['book:', 'k23', 'cornell', 'library', 'original ofthisbook', 'dr.',
                         'christopher columbus andtheparticipati', 'ipmumyi', 'bought withtheincome']
        text_lower = text.lower()
        for indicator in ocr_indicators:
            if indicator in text_lower:
                quality_score -= 2.0  # Stronger penalty

        # Penalty for excessive numbers
        number_count = len(re.findall(r'\d+', text))
        quality_score -= number_count * 0.1

        # Penalty for broken words (split words like "Christia n" -> "Christia" "n")
        words = text.split()
        short_word_penalty = 0
        # Count very short words that might be word fragments
        for word in words:
            if len(word) <= 2 and word.isalpha():
                short_word_penalty += 0.2
        quality_score -= min(short_word_penalty, 1.0)  # Cap penalty

        # Penalty for double spaces
        double_spaces = text.count('  ')
        quality_score -= double_spaces * 0.1

        # Penalty for gibberish patterns (consecutive consonants without vowels)
        gibberish_patterns = len(re.findall(r'[bcdfghjklmnpqrstvwxyz]{4,}', text_lower))
        quality_score -= gibberish_patterns * 0.3

        # Bonus for complete sentences
        sentences = re.split(r'[.!?]+', text)
        complete_sentences = sum(1 for s in sentences if len(s.strip().split()) > 5)
        quality_score += complete_sentences * 0.1

        return max(0.0, quality_score)  # Don't go below 0

    def filter_context_quality(self, context: str) -> str:
        """Filter and clean context chunks, prioritizing higher quality content"""
        chunks = context.split('\n---\n')
        scored_chunks = []

        for chunk in chunks:
            if not chunk.strip():
                continue

            # Extract content (skip metadata lines)
            lines = chunk.split('\n')
            content_lines = []
            for line in lines:
                if line.strip() and not line.startswith('[Source:') and not line.startswith('Relevance:'):
                    # Clean the line
                    cleaned_line = self.clean_ocr_text(line)
                    if cleaned_line and len(cleaned_line.split()) > 3:  # Require minimum meaningful content
                        content_lines.append(cleaned_line)

            if content_lines:
                content_text = ' '.join(content_lines)
                quality_score = self.score_content_quality(content_text)

                # Reconstruct chunk with metadata
                metadata_lines = [line for line in lines if line.startswith('[Source:') or line.startswith('Relevance:')]
                filtered_chunk = '\n'.join(metadata_lines + content_lines)
                scored_chunks.append((filtered_chunk, quality_score))

        # Sort by quality score (highest first) and take top chunks
        scored_chunks.sort(key=lambda x: x[1], reverse=True)
        filtered_chunks = [chunk for chunk, score in scored_chunks[:4]]  # Take top 4 highest quality

        return '\n---\n'.join(filtered_chunks)

    def analyze_question(self, question: str) -> str:
        """Analyze the question to understand what kind of answer is needed"""
        question_lower = question.lower()

        # Detect source lookup / document listing questions
        source_lookup_phrases = [
            'which pdf', 'which document', 'which file',
            'what other pdf', 'what other document', 'what other file',
            'what sources', 'which sources', 'list the pdfs', 'list the documents'
        ]
        if any(phrase in question_lower for phrase in source_lookup_phrases):
            return "source_lookup"
        if 'mention' in question_lower and any(term in question_lower for term in ['pdf', 'document', 'file', 'source', 'book']):
            return "source_lookup"
        if question_lower.startswith('list') and any(term in question_lower for term in ['pdf', 'document', 'file', 'source']):
            return "source_lookup"

        # Enhanced: Detect technical questions first
        if self._detect_technical_query(question):
            # Technical questions are usually factual but need precise answers
            return "factual"

        # Classify question type
        if any(word in question_lower for word in ['who', 'what', 'where', 'when', 'how', 'why']):
            return "factual"
        elif any(word in question_lower for word in ['explain', 'describe', 'define']):
            return "explanatory"
        elif any(word in question_lower for word in ['compare', 'difference', 'similar']):
            return "comparative"
        else:
            return "general"

    def fuzzy_match_score(self, word1: str, word2: str) -> float:
        """Calculate fuzzy similarity score between two words using rapidfuzz"""
        if not word1 or not word2:
            return 0.0
        return fuzz.ratio(word1.lower(), word2.lower()) / 100.0

    def _split_into_sentences(self, text: str) -> List[str]:
        """Split text into sentences with basic cleanup"""
        if not text:
            return []
        # Normalize hyphenated headings or transitions into sentence boundaries
        text = re.sub(r'(?<=\w)-(?=[A-Z])', '. ', text)
        sentences = re.split(r'(?<=[.!?])\s+|\n', text)
        cleaned = []
        for sentence in sentences:
            sentence = self.clean_ocr_text(sentence.strip())
            if len(sentence) > 20:
                cleaned.append(sentence)
        return cleaned

    def create_preview_text(self, text: str, max_sentences: int = 2, max_length: int = 220) -> str:
        """Create a short, clean preview snippet for UI display"""
        sentences = self._split_into_sentences(text)
        if not sentences:
            return ""

        preview = ' '.join(sentences[:max_sentences]).strip()
        if len(preview) > max_length:
            preview = preview[:max_length].rsplit(' ', 1)[0] + '...'
        return preview

    def consolidate_context_snippet(self, context: str, limit_sentences: int = 4) -> str:
        """Build a concise snippet from the cleaned context for summarisation or fallback"""
        sentences = self._split_into_sentences(context)
        snippet = ' '.join(sentences[:limit_sentences]).strip()
        return snippet[:800]

    def prepare_source_summaries(self, search_results, max_sources: int = 5) -> List[Dict[str, Any]]:
        """Aggregate search results by source and produce cleaned previews"""
        aggregated: Dict[str, Dict[str, Any]] = {}

        for result in search_results:
            source = result['metadata'].get('source', 'Unknown')
            score = float(result.get('score', 0.0))
            keyword_score = float(result.get('keyword_score', 0.0))
            summary = result['metadata'].get('summary', '')
            themes = result['metadata'].get('themes', []) or []
            key_points = result['metadata'].get('key_points')
            document_title = result['metadata'].get('document_title')
            keyword_snippet = result.get('keyword_snippet')

            digest = self.document_digests.get(source)
            if digest:
                summary = digest.get('overview') or summary
                if digest.get('key_points'):
                    key_points = digest['key_points']
                if digest.get('themes'):
                    themes = digest['themes']
                if digest.get('title'):
                    document_title = digest['title']

            digest = self.document_digests.get(source)
            if digest:
                summary = digest.get('overview') or summary
                digest_points = digest.get('key_points', [])
                if digest_points:
                    key_points = digest_points
                digest_themes = digest.get('themes', []) or []
                if digest_themes:
                    themes = digest_themes

            if key_points is None:
                key_points = []

            if keyword_snippet:
                preview = self.create_preview_text(keyword_snippet, max_sentences=2)
            elif summary:
                preview = summary
            else:
                cleaned_content = self.clean_ocr_text(result.get('content', ''))
                preview = self.create_preview_text(cleaned_content)

            if key_points and not summary:
                preview = ' '.join(key_points[:2])

            if source in aggregated:
                aggregated[source]['score'] = max(aggregated[source]['score'], score)
                aggregated[source]['keyword_score'] = max(aggregated[source]['keyword_score'], keyword_score)
                if preview and len(preview) > len(aggregated[source]['preview']):
                    aggregated[source]['preview'] = preview
                    if keyword_snippet:
                        aggregated[source]['keyword_snippet'] = keyword_snippet
                if key_points:
                    existing_points = aggregated[source].setdefault('key_points', [])
                    if not existing_points:
                        aggregated[source]['key_points'] = key_points[:3]
                if themes:
                    existing_themes = aggregated[source].setdefault('themes', [])
                    if not existing_themes:
                        aggregated[source]['themes'] = themes[:5]
                if summary and not aggregated[source].get('summary'):
                    aggregated[source]['summary'] = summary
                if document_title and not aggregated[source].get('title'):
                    aggregated[source]['title'] = document_title
            else:
                aggregated[source] = {
                    'source': source,
                    'score': score,
                    'keyword_score': keyword_score,
                    'preview': preview,
                    'summary': summary,
                    'key_points': key_points[:3] if key_points else [],
                    'themes': themes[:5],
                    'title': document_title,
                    'keyword_snippet': keyword_snippet
                }

        summaries = sorted(aggregated.values(), key=lambda x: x['score'], reverse=True)
        return summaries[:max_sources]

    def collect_distilled_insights(
        self,
        search_results: Optional[List[Dict[str, Any]]],
        limit: int = 4
    ) -> List[Dict[str, Any]]:
        """Collect distilled document digests relevant to the question."""
        if not search_results:
            return []

        insights: List[Dict[str, Any]] = []
        seen: Set[str] = set()

        for result in search_results:
            metadata = result.get('metadata', {})
            source = metadata.get('source')
            if not source or source in seen:
                continue

            digest = self.document_digests.get(source)
            if digest:
                insight = {
                    'source': source,
                    'title': self._humanize_title(source, digest.get('title') or metadata.get('document_title')),
                    'overview': digest.get('overview') or metadata.get('summary') or "",
                    'key_points': digest.get('key_points') or metadata.get('key_points') or [],
                    'themes': digest.get('themes') or metadata.get('themes') or [],
                    'score': float(result.get('score', 0.0)),
                    'keyword_score': float(result.get('keyword_score', 0.0))
                }
            else:
                overview = metadata.get('summary') or self.clean_ocr_text(result.get('content', '')[:400])
                insight = {
                    'source': source,
                    'title': self._humanize_title(source, metadata.get('document_title')),
                    'overview': overview,
                    'key_points': metadata.get('key_points') or [],
                    'themes': metadata.get('themes') or [],
                    'score': float(result.get('score', 0.0)),
                    'keyword_score': float(result.get('keyword_score', 0.0))
                }

            insights.append(insight)
            seen.add(source)

        insights.sort(key=lambda x: (x['score'], x['keyword_score']), reverse=True)
        return insights[:limit]

    def answer_from_distilled_insights(
        self,
        question: str,
        insights: List[Dict[str, Any]],
        question_type: str
    ) -> str:
        """Generate a synthesized answer directly from document digests."""
        if not insights:
            return ""

        question_words_raw = set(re.findall(r'\b\w+\b', question.lower()))
        question_words = self._filter_stop_words(question_words_raw)
        if not question_words:
            question_words = question_words_raw
        scored: List[Tuple[float, Dict[str, Any]]] = []

        for insight in insights:
            theme_words = set()
            for theme in insight.get('themes', []):
                theme_tokens = re.findall(r'\b\w+\b', theme.lower())
                theme_words.update(self._filter_stop_words(theme_tokens))

            key_point_words = set()
            for point in insight.get('key_points', []):
                key_tokens = re.findall(r'\b\w+\b', point.lower())
                key_point_words.update(self._filter_stop_words(key_tokens))

            overlap = question_words.intersection(theme_words | key_point_words)
            relevance = len(overlap) / max(1, len(question_words))

            combined_score = (
                insight['score'] * 0.7 +
                insight.get('keyword_score', 0.0) * 0.3 +
                relevance
            )
            scored.append((combined_score, insight))

        scored.sort(key=lambda x: x[0], reverse=True)
        selected = [insight for _, insight in scored[:3]] or insights[:2]

        # Collect all sentences from all insights with deduplication
        all_sentences = []
        seen_sentences = set()
        source_titles = []
        
        for insight in selected:
            title = insight.get('title') or Path(insight['source']).stem.replace('_', ' ')
            source_titles.append(title)
            sentences = self._extract_relevant_sentences(question_words, insight)
            
            for sentence in sentences:
                if not sentence or len(sentence.strip()) < 20:
                    continue
                
                # Clean the sentence first
                cleaned = self._clean_sentence(sentence)
                if not cleaned:
                    continue
                
                # Normalize for comparison (more aggressive)
                normalized = re.sub(r'[^\w\s]', '', cleaned.lower().strip())
                normalized = re.sub(r'\s+', ' ', normalized)
                
                # Extract key words (remove common words)
                words = [w for w in normalized.split() if len(w) > 3]
                if len(words) < 5:  # Skip very short sentences
                    continue
                
                # Check for duplicates using word overlap
                is_duplicate = False
                for seen_norm in seen_sentences:
                    seen_words = set([w for w in seen_norm.split() if len(w) > 3])
                    current_words = set(words)
                    
                    if len(current_words) == 0 or len(seen_words) == 0:
                        continue
                    
                    overlap = len(current_words.intersection(seen_words))
                    total_unique = len(current_words.union(seen_words))
                    
                    if total_unique > 0 and overlap / total_unique > 0.7:  # 70% word overlap
                        is_duplicate = True
                        break
                
                if not is_duplicate:
                    all_sentences.append(cleaned)
                    seen_sentences.add(' '.join(words))

        if not all_sentences:
            return ""

        # Build formatted answer
        intro = self._build_intro_from_insights(question, selected, question_type)
        
        # Format answer with proper structure
        answer_parts = []
        
        if intro:
            # Clean up intro - remove redundant theme lists if they appear
            intro_clean = self._clean_intro(intro)
            answer_parts.append(intro_clean)
            answer_parts.append("")  # Blank line for spacing

        # Add key points/sentences with proper formatting
        if all_sentences:
            # Group sentences into paragraphs (max 2 sentences per paragraph)
            current_para = []
            for i, sentence in enumerate(all_sentences[:8]):  # Limit to top 8 sentences
                current_para.append(sentence)
                
                # Start new paragraph every 2 sentences
                if len(current_para) >= 2:
                    para_text = " ".join(current_para)
                    answer_parts.append(para_text)
                    answer_parts.append("")  # Blank line
                    current_para = []
            
            # Add remaining sentences
            if current_para:
                answer_parts.append(" ".join(current_para))

        # Remove empty lines and join
        answer = "\n".join(line for line in answer_parts if line.strip()).strip()
        
        # Apply final formatting cleanup to remove any remaining duplicates
        answer = self._format_final_answer(answer)
        
        if len(answer) < 60:
            return ""
        if self.is_generic_output(answer):
            return ""

        # Add sources in clean format
        unique_sources = self._dedupe_preserve(source_titles)
        if unique_sources:
            sources_line = "\n\nSources: " + ", ".join(unique_sources[:3])
            answer = answer + sources_line
        
        return answer

    def _build_intro_from_insights(self, question: str, insights: List[Dict[str, Any]], question_type: str) -> str:
        """Compose an introductory sentence summarizing the combined insights."""
        if not insights:
            return ""

        if question_type == "factual":
            lead = "According to the distilled documents, "
        elif question_type == "explanatory":
            lead = "The documents explain that "
        else:
            lead = "The sources indicate that "

        theme_terms = []
        for insight in insights:
            for theme in insight.get('themes', []):
                theme_terms.append(theme.title())
        theme_terms = self._dedupe_preserve(theme_terms)

        if theme_terms:
            joined_entities = ', '.join(theme_terms[:4])
            return f"{lead}{joined_entities} feature prominently in discussions related to “{question}”."
        else:
            entities = [insight.get('title') for insight in insights if insight.get('title')]
            entities = self._dedupe_preserve(entities)
            joined_entities = ', '.join(entities[:3]) if entities else 'the referenced sources'
            return f"{lead}{joined_entities} provide accounts relevant to “{question}”."

    def create_source_lookup_answer(
        self,
        question: str,
        context: str,
        search_results: Optional[List[Dict[str, Any]]] = None
    ) -> str:
        """Generate answer listing which documents mention the queried topic."""
        question_lower = question.lower()

        stop_words = {
            'what', 'which', 'other', 'another', 'pdf', 'pdfs', 'document', 'documents',
            'book', 'books', 'file', 'files', 'mention', 'mentions', 'mentioned',
            'talk', 'talks', 'discuss', 'discusses', 'reference', 'references',
            'cover', 'covers', 'include', 'includes', 'show', 'shows', 'list', 'lists',
            'any', 'do', 'does', 'about', 'topic', 'sources', 'source', 'else'
        }

        keywords = [
            word for word in re.findall(r'\b\w+\b', question_lower)
            if len(word) > 2 and word not in stop_words
        ]

        if not keywords:
            return ""

        # Expand keywords with simple variations (singular/plural roots)
        expanded_keywords = set()
        for word in keywords:
            expanded_keywords.add(word)
            if word.endswith('s'):
                expanded_keywords.add(word[:-1])
            else:
                expanded_keywords.add(word + 's')

        matches: List[Dict[str, Any]] = []
        seen_sources = set()

        def add_match(source: str, score: float, snippet: str):
            if source in seen_sources:
                return
            seen_sources.add(source)
            matches.append({
                'source': source,
                'score': score,
                'snippet': snippet.strip() if snippet else ""
            })

        # First use structured search results if available
        if search_results:
            for result in search_results:
                source = result.get('metadata', {}).get('source', 'Unknown')
                score = float(result.get('score', 0.0))
                content = self.clean_ocr_text(result.get('content', '') or '')
                content_lower = content.lower()

                if any(kw in content_lower for kw in expanded_keywords):
                    snippet = self.create_preview_text(content, max_sentences=2, max_length=240)
                    add_match(source, score, snippet)

        # Fall back to parsed context chunks if search results didn't produce matches
        if not matches and context:
            chunks = context.split('\n---\n')
            for chunk in chunks:
                if '[Source:' not in chunk:
                    continue
                lines = chunk.split('\n')
                source_line = next((line for line in lines if line.startswith('[Source:')), '')
                source = source_line.replace('[Source:', '').replace(']', '').strip() or 'Unknown'
                try:
                    score_line = next((line for line in lines if line.startswith('Relevance:')), 'Relevance: 0')
                    score = float(score_line.split(':', 1)[1].strip())
                except Exception:
                    score = 0.0
                content = '\n'.join(line for line in lines if line and not line.startswith('[') and not line.startswith('Relevance:'))
                content = self.clean_ocr_text(content)
                content_lower = content.lower()

                if any(kw in content_lower for kw in expanded_keywords):
                    snippet = self.create_preview_text(content, max_sentences=2, max_length=240)
                    add_match(source, score, snippet)

        if not matches:
            return ""

        matches.sort(key=lambda x: x['score'], reverse=True)

        topic_phrase = ', '.join(sorted(set(keywords)))
        lines = [
            f"The following documents mention {topic_phrase}:"
        ]

        for idx, match in enumerate(matches, start=1):
            snippet_part = f" — {match['snippet']}" if match['snippet'] else ""
            lines.append(f"{idx}. {match['source']}{snippet_part}")

        return '\n'.join(lines)

    def extract_key_info(self, context, question):
        """Extract the most relevant information from context with improved keyword matching"""
        # Extract and expand keywords from question
        question_lower = question.lower()
        raw_question_words = set(re.findall(r'\b\w+\b', question_lower))
        raw_question_words = {word for word in raw_question_words if len(word) > 2}

        stop_words = {
            'the', 'and', 'who', 'what', 'when', 'where', 'which', 'that', 'this',
            'with', 'from', 'into', 'about', 'does', 'did', 'will', 'would',
            'could', 'should', 'shall', 'were', 'have', 'has', 'had', 'been',
            'being', 'before', 'after', 'into', 'onto', 'upon', 'their', 'there',
            'according', 'available', 'documents', 'document', 'please', 'other'
        }

        question_words = {word for word in raw_question_words if word not in stop_words}
        if not question_words:
            question_words = {word for word in raw_question_words if len(word) > 4}
        if not question_words:
            question_words = raw_question_words

        # Create expanded keyword set with variations
        expanded_words = set(question_words)
        synonym_map = {
            'america': ['continent', 'territory', 'new', 'world'],
            'americas': ['continent', 'territory', 'new', 'world'],
            'culture': ['civilization', 'people', 'tribe', 'nation'],
            'peoples': ['people', 'tribes', 'nations', 'ancient'],
            'people': ['tribes', 'nations', 'cultures', 'ancient'],
            'naga': ['serpent', 'serpents', 'snake', 'snakes'],
            'nagas': ['serpent', 'serpents', 'snake', 'snakes', 'wisdom'],
            'explorers': ['voyagers', 'navigators', 'expeditions'],
            'funding': ['finance', 'financing', 'capital', 'backing'],
            'trade': ['commerce', 'exchange', 'network', 'routes']
        }
        for word in question_words:
            if len(word) > 3:
                # Add partial matches (first 4+ characters)
                expanded_words.add(word[:4])
                # Add common variations
                if word.endswith('s'):
                    expanded_words.add(word[:-1])  # singular
                else:
                    expanded_words.add(word + 's')  # plural

                # Add common synonyms for key historical terms
                if word in ['america', 'americas']:
                    expanded_words.update(['land', 'territory', 'new', 'world'])
                elif word in ['culture', 'cultures']:
                    expanded_words.update(['civilization', 'society', 'people', 'tribe', 'nation'])
                elif word in ['before', 'columbus', 'pre']:
                    expanded_words.update(['prior', 'ancient', 'early', 'indigenous', 'native'])

                if word in synonym_map:
                    expanded_words.update(synonym_map[word])

        # Split context into chunks by source
        chunks = context.split('\n---\n')
        scored_sentences = []

        for chunk in chunks:
            if not chunk.strip():
                continue

            # Extract relevance score from chunk header
            base_score = 0.0
            if '[Source:' in chunk and 'Relevance:' in chunk:
                try:
                    score_part = chunk.split('Relevance:')[1].split(']')[0].strip()
                    base_score = float(score_part)
                except (IndexError, ValueError, AttributeError) as e:
                    # Score parsing failed, use default
                    base_score = 0.0

            # Extract sentences from this chunk
            lines = chunk.split('\n')
            content_start = 0
            for i, line in enumerate(lines):
                if line.strip() and not line.startswith('[Source:'):
                    content_start = i
                    break

            content = '\n'.join(lines[content_start:])
            sentences = re.split(r'[.!?]+', content)

            for sentence in sentences:
                sentence_clean = sentence.strip()
                if len(sentence_clean) < 15:  # Skip very short fragments
                    continue

                # Pre-clean sentence for OCR artifacts
                sentence_clean = self.clean_ocr_text(sentence_clean)
                if len(sentence_clean) < 15:  # Skip if cleaning removed too much
                    continue

                sentence_lower = sentence_clean.lower()
                sentence_words = set(re.findall(r'\b\w+\b', sentence_lower))

                relevance_score = 0
                matched_terms = []

                # Check for exact matches (highest priority)
                exact_matches = question_words.intersection(sentence_words)
                if exact_matches:
                    relevance_score += len(exact_matches) * 15  # Higher weight
                    matched_terms.extend(exact_matches)

                # Check for expanded matches
                for q_word in question_words:
                    for s_word in sentence_words:
                        if len(q_word) > 3 and len(s_word) > 3:
                            similarity = self.fuzzy_match_score(q_word, s_word)
                            if similarity > 0.8:  # Very close match
                                relevance_score += similarity * 10
                                matched_terms.append(s_word)
                            elif similarity > 0.6:  # Reasonable match
                                relevance_score += similarity * 5
                                matched_terms.append(s_word)

                # Check for synonym matches
                synonym_matches = expanded_words.intersection(sentence_words)
                if synonym_matches:
                    relevance_score += len(synonym_matches) * 8
                    matched_terms.extend(synonym_matches)

                # Factor in source relevance score
                relevance_score += base_score * 3

                # Bonus for sentences with multiple matches
                unique_matches = len(set(matched_terms))
                if unique_matches > 1:
                    relevance_score += unique_matches * 2

                if relevance_score > 2:  # Minimum threshold
                    scored_sentences.append((sentence_clean, relevance_score, base_score))

        # Sort by combined relevance score and take top sentences
        scored_sentences.sort(key=lambda x: x[1], reverse=True)
        top_sentences = [sentence for sentence, _, _ in scored_sentences[:6]]  # Take top 6

        # If no relevant sentences found, use fallback strategy
        if not top_sentences:
            chunks_with_scores = []
            for chunk in chunks:
                score = 0.0
                if 'Relevance:' in chunk:
                    try:
                        score_part = chunk.split('Relevance:')[1].split(']')[0].strip()
                        score = float(score_part)
                    except:
                        score = 0.0
                chunks_with_scores.append((chunk, score))

            chunks_with_scores.sort(key=lambda x: x[1], reverse=True)
            for chunk, _ in chunks_with_scores[:2]:  # Top 2 highest-scoring chunks
                lines = chunk.split('\n')
                content_start = 0
                for i, line in enumerate(lines):
                    if line.strip() and not line.startswith('[Source:'):
                        content_start = i
                        break
                content = '\n'.join(lines[content_start:])
                sentences = re.split(r'[.!?]+', content)
                # Take first 2 substantial sentences from each chunk
                chunk_sentences = [s.strip() for s in sentences if len(s.strip()) > 20][:2]
                top_sentences.extend(chunk_sentences)

        combined = ' '.join(top_sentences[:5])  # Return top 5 sentences max
        return self.clean_ocr_text(combined)

    def format_extracted_answer(self, extracted_info, question_type):
        """Format extracted information into a coherent answer"""

        # Clean up the extracted information
        sentences = re.split(r'[.!?]+', extracted_info)
        meaningful_sentences = [s.strip() for s in sentences if len(s.strip()) > 10][:3]

        if not meaningful_sentences:
            return ""

        # Join sentences and clean up
        answer = '. '.join(meaningful_sentences)
        if not answer.endswith('.'):
            answer += '.'

        # Capitalize first letter
        answer = answer[0].upper() + answer[1:] if answer else ""

        return answer

    def get_fallback_answer(self, question):
        """Provide fallback answers for common questions"""

        question_lower = question.lower()

        # Special cases for book titles and specific topics
        if 'secret destiny of america' in question_lower:
            return "The Secret Destiny of America is a book by Manly P. Hall that explores the esoteric and philosophical foundations of American democracy, suggesting that America has a special spiritual destiny in world affairs, rooted in hermetic and mystical traditions."

        if 'naga' in question_lower:
            return "The Nagas were semi-divine beings in Hindu and Buddhist mythology, often depicted as serpents or having human upper bodies with serpent lower bodies. They were considered wise beings associated with water, wisdom, and sometimes royalty."

        elif 'manly' in question_lower or 'hall' in question_lower or 'p hall' in question_lower:
            return "Manly P. Hall was an American mystic, author, and philosopher known for his extensive research into esoteric traditions, alchemy, and mysticism. He authored numerous books including 'The Secret Destiny of America' and was a prominent figure in the study of hermetic philosophy."

        elif 'secret doctrine' in question_lower or 'blavatsky' in question_lower:
            return "The Secret Doctrine is a book by Helena Petrovna Blavatsky, published in 1888. It outlines her teachings on the origins of humanity, the nature of the universe, and esoteric philosophy, drawing from various religious and mystical traditions."

        else:
            return self.GENERIC_FALLBACK_MESSAGE

    def get_context_aware_fallback(self, context: str, question: str) -> str:
        """Create intelligent, agentic fallback using available context information"""
        if not context:
            return self.get_fallback_answer(question)

        # Extract source information and content snippets
        chunks = context.split('\n---\n')
        sources = []
        top_snippets = []

        for chunk in chunks[:3]:  # Check top 3 chunks
            if '[Source:' in chunk:
                source_line = chunk.split('\n')[0]
                source_name = source_line.split(']')[0].replace('[Source: ', '')
                sources.append(source_name)

                # Extract a meaningful snippet from this chunk
                lines = chunk.split('\n')
                content_lines = [self.clean_ocr_text(line) for line in lines if line.strip() and not line.startswith('[')]
                if content_lines:
                    preview = self.create_preview_text(' '.join(content_lines), max_sentences=2, max_length=180)
                    if preview:
                        top_snippets.append(f"{source_name}: {preview}")

        # Analyze question to provide targeted fallback
        question_lower = question.lower()
        question_type = self.analyze_question(question)

        # Topic-specific fallbacks with evidence
        if any(term in question_lower for term in ['culture', 'people', 'civilization', 'native', 'indigenous']):
            if any('america' in s.lower() or 'settler' in s.lower() or 'fell' in s.lower() for s in sources):
                return "I found relevant information about pre-Columbian cultures in the Americas. From '" + (sources[0] if sources else 'the documents') + "': " + (top_snippets[0] if top_snippets else "The documents discuss ancient settlements and civilizations in the Americas before European contact.")

        if any(term in question_lower for term in ['columbus', 'christopher', 'voyage', 'discovery', 'spanish', 'portuguese']):
            if any('columbus' in s.lower() for s in sources):
                return "The documents contain information about Christopher Columbus and his expeditions. From '" + (sources[0] if sources else 'the documents') + "': " + (top_snippets[0] if top_snippets else "Columbus's voyages and the historical context of Spanish exploration are discussed.")

        if any(term in question_lower for term in ['jew', 'jewish', 'hebrew', 'israel']):
            if any('columbus' in s.lower() for s in sources):
                return "The documents explore the participation of Jewish individuals and communities in Columbus's expeditions. From '" + (sources[0] if sources else 'the documents') + "': " + (top_snippets[0] if top_snippets else "Jewish involvement in Spanish and Portuguese maritime discoveries is examined.")

        if any(term in question_lower for term in ['secret', 'destiny', 'manly', 'hall', 'esoteric', 'mystical']):
            if any('hall' in s.lower() or 'destiny' in s.lower() for s in sources):
                return "The documents include esoteric interpretations of American history. From '" + (sources[0] if sources else 'the documents') + "': " + (top_snippets[0] if top_snippets else "Manly P. Hall's work on the mystical foundations of America is featured.")

        # General contextual fallback
        if sources and top_snippets:
            return "While I couldn't extract a direct answer to your specific question, the documents provide relevant context. From '" + sources[0] + "': " + top_snippets[0] + " This information may help address aspects of your query about historical and esoteric topics."

        # Ultimate fallback
        return "I analyzed the available documents but couldn't find a definitive answer to your question about '" + question + "'. The sources appear to focus on historical and esoteric topics. You might try rephrasing your question or asking about specific aspects covered in the documents."

    def reason_step_by_step(
        self,
        question: str,
        context: str,
        question_type: str,
        specific_fallback: Optional[str] = None,
        distilled_insights: Optional[List[Dict[str, Any]]] = None
    ) -> str:
        """Multi-step reasoning process for generating answers"""

        # Step 1: Spelling correction
        corrected_question = self.correct_spelling(question)

        # Step 2: Clean context for quality
        cleaned_context = self.filter_context_quality(context)

        # Determine if we have a specific fallback we should preserve for later
        if specific_fallback is None:
            specific_fallback = self.get_fallback_answer(corrected_question)
        use_specific_fallback = (
            specific_fallback is not None and specific_fallback != self.GENERIC_FALLBACK_MESSAGE
        )

        # Step 3: Extract key information from cleaned context
        key_info = self.extract_key_info(cleaned_context, corrected_question)
        key_info_clean = self.clean_ocr_text(key_info) if key_info else ""

        # Step 4: If there is no usable context and we have a high-confidence fallback (like FAQ answers), use it
        if use_specific_fallback and not cleaned_context.strip():
            return specific_fallback

        # Step 5: If we have sufficient extracted info, format it
        if key_info_clean and len(key_info_clean.strip()) > 50:
            quality_score = self.score_content_quality(key_info_clean)
            if quality_score > 0.6:
                formatted_answer = self.create_answer_from_extraction(corrected_question, key_info_clean, question_type)
                if formatted_answer:
                    return formatted_answer

        # Step 6: Try summarising the best snippets before falling back
        synthesized = self.summarize_from_context(corrected_question, key_info_clean, cleaned_context, question_type)
        if synthesized:
            return synthesized

        # Step 7: Generate with model using structured prompt and cleaned context
        generated = self.generate_with_reflection(
            corrected_question,
            key_info_clean,
            cleaned_context,
            question_type,
            context,
            distilled_insights or []
        )
        if generated and generated != self.GENERIC_FALLBACK_MESSAGE:
            return generated

        # Step 8: As a last resort, use context-aware fallback, or specific fallback if that still yielded a generic response
        contextual = self.get_context_aware_fallback(context, corrected_question)
        if contextual == self.GENERIC_FALLBACK_MESSAGE and use_specific_fallback:
            return specific_fallback
        return contextual

    def generate_with_model(self, corrected_question: str, key_info: str, full_context: str, question_type: str, original_context: str) -> str:
        """Generate answer using the language model with improved prompting"""

        # Create a more structured prompt that encourages agentic behavior
        if question_type == "source_lookup":
            prompt_template = """You are an archivist assisting with document retrieval. Use only the provided context to identify which documents mention the requested topic. Present the results as a concise numbered list with the document title followed by a short supporting snippet.

QUESTION: {question}

KEY CONTEXT: {key_info}

FULL CONTEXT: {context}

INSTRUCTIONS:
- Only include documents that clearly mention the topic
- Quote or summarise the relevant mention in one short phrase
- If the topic is not mentioned, state that it is not found

RESULTS:"""
        elif question_type == "factual":
            prompt_template = """You are an intelligent research assistant analyzing documents. Answer this factual question directly and precisely, using only information from the provided context.

QUESTION: {question}

KEY CONTEXT: {key_info}

FULL CONTEXT: {context}

INSTRUCTIONS:
- Answer factually and concisely
- Use evidence from the documents
- If information is unclear, say so rather than speculate
- Structure your answer clearly
- For technical questions (pH, temperature, measurements), extract specific numbers/ranges from the context
- Do NOT provide generic information if the specific answer isn't in the context

ANSWER:"""
        elif question_type == "explanatory":
            prompt_template = """You are a knowledgeable analyst explaining historical and esoteric topics from primary sources. Provide a clear, evidence-based explanation.

QUESTION: {question}

KEY CONTEXT: {key_info}

FULL CONTEXT: {context}

INSTRUCTIONS:
- Explain concepts using document evidence
- Break down complex ideas step by step
- Maintain historical accuracy
- Be comprehensive but concise

EXPLANATION:"""
        else:
            prompt_template = """You are a thoughtful researcher synthesizing information from historical documents to address complex questions.

QUESTION: {question}

KEY CONTEXT: {key_info}

FULL CONTEXT: {context}

INSTRUCTIONS:
- Synthesize information from multiple sources when relevant
- Provide balanced, evidence-based insights
- Acknowledge uncertainties in the historical record
- Connect ideas meaningfully

RESPONSE:"""

        # Prepare and limit context to avoid token limits
        context_summary = full_context[:1500] if len(full_context) > 1500 else full_context
        key_info_summary = key_info[:800] if key_info else "No specific key information extracted from the documents."

        # Format prompt
        prompt = prompt_template.format(
            question=corrected_question,
            key_info=key_info_summary,
            context=context_summary
        )

        try:
            # Generate response using Grok API
            generated_text = self._call_grok_for_generation(
                prompt,
                max_tokens=min(MAX_NEW_TOKENS, 150),
                temperature=max(TEMPERATURE, 0.3)
            )
            if not generated_text:
                generated_text = prompt  # Fallback

            # Extract answer (remove prompt) - improved extraction
            answer = None
            
            # Try multiple extraction strategies
            if "Answer:" in generated_text:
                answer = generated_text.split("Answer:")[-1].strip()
            elif "ANSWER:" in generated_text:
                answer = generated_text.split("ANSWER:")[-1].strip()
            elif "EXPLANATION:" in generated_text:
                answer = generated_text.split("EXPLANATION:")[-1].strip()
            elif "RESPONSE:" in generated_text:
                answer = generated_text.split("RESPONSE:")[-1].strip()
            else:
                # Try to find answer after prompt
                prompt_len = len(prompt)
                if len(generated_text) > prompt_len:
                    answer = generated_text[prompt_len:].strip()
                else:
                    answer = generated_text.strip()
            
            # Remove prompt if it leaked through
            if prompt in answer:
                answer = answer.replace(prompt, "").strip()
            
            # Check for repetition (common issue with LLMs)
            sentences = answer.split('.')
            if len(sentences) > 2:
                # Check if first 2 sentences are identical
                if sentences[0].strip() == sentences[1].strip():
                    answer = '. '.join([sentences[0]] + sentences[2:])
            
            # Post-process
            answer = self.post_process_answer(answer, question_type) if answer else None

            # Final validation - check answer quality
            if not answer or len(answer.strip()) < 20:
                return self.get_context_aware_fallback(original_context, corrected_question)
            
            # Check for repetitive or broken answers
            answer_lower = answer.lower()
            if (answer_lower.startswith("based on the available documents") or
                answer_lower.count(answer_lower[:50]) > 2 or  # Same text repeated 3+ times
                self.score_content_quality(answer) < 0.5):  # Poor quality generated answer
                return self.get_context_aware_fallback(original_context, corrected_question)

            return answer

        except Exception as e:
            print(f"Error in model generation: {e}")
            return self.get_context_aware_fallback(full_context, corrected_question)

    def generate_with_reflection(
        self,
        corrected_question: str,
        key_info: str,
        cleaned_context: str,
        question_type: str,
        original_context: str,
        distilled_insights: List[Dict[str, Any]]
    ) -> str:
        """Two-stage reflective generation to simulate internal reasoning."""
        insight_section = self._format_insight_section(distilled_insights)
        memory_section = '\n'.join(f"- {note}" for note in self.short_term_memory[-self.memory_limit:]) or "None recorded."
        context_summary = cleaned_context[:1200]
        key_info_segment = key_info[:600] if key_info else "No specific key facts extracted yet."
        entity_hint_section = self._format_entity_graph_hints(corrected_question)

        analysis_prompt = f"""You are preparing internal research notes. Think carefully before answering the user.

QUESTION: {corrected_question}

DISTILLED INSIGHTS:
{insight_section if insight_section else 'None provided.'}

KEY INFO:
{key_info_segment}

SHARED MEMORY:
{memory_section}

CONTEXT SNAPSHOT:
{context_summary}

ENTITY GRAPH HINTS:
{entity_hint_section if entity_hint_section else 'No entity relationships surfaced for this query.'}

Write concise bullet-style analysis notes capturing the most relevant facts, names, places, and time periods. Do not answer the question yet. Focus on evidence and uncertainty."""

        try:
            # Use Grok API for analysis
            analysis_text = self._call_grok_for_generation(
                analysis_prompt,
                max_tokens=140,
                temperature=max(TEMPERATURE, 0.3)
            )
        except Exception as e:
            print(f"Warning: reflection analysis failed: {e}")
            return self.generate_with_model(corrected_question, key_info, cleaned_context, question_type, original_context)

        if analysis_text:
            self.short_term_memory.append(analysis_text)
            if len(self.short_term_memory) > self.memory_limit:
                self.short_term_memory = self.short_term_memory[-self.memory_limit:]

        final_prompt = self._build_reflective_prompt(
            corrected_question,
            analysis_text,
            distilled_insights,
            key_info_segment,
            original_context,
            question_type,
            entity_hint_section
        )

        try:
            # Use Grok API for final answer generation
            react_output = self._call_grok_for_generation(
                final_prompt,
                max_tokens=min(MAX_NEW_TOKENS, 220),
                temperature=max(TEMPERATURE, 0.4)
            )
            final_answer_raw, react_trace = self._extract_final_answer_from_react(react_output)
            if react_trace:
                memo = self.clean_ocr_text(react_trace.strip())[:280]
                if memo:
                    self.short_term_memory.append(f"Trace: {memo}")
                    if len(self.short_term_memory) > self.memory_limit:
                        self.short_term_memory = self.short_term_memory[-self.memory_limit:]

            if not final_answer_raw:
                return self.generate_with_model(corrected_question, key_info, cleaned_context, question_type, original_context)

            answer = self.post_process_answer(final_answer_raw, question_type)
            if len(answer) < 40:
                return self.generate_with_model(corrected_question, key_info, cleaned_context, question_type, original_context)
            return answer
        except Exception as e:
            print(f"Warning: reflection generation failed: {e}")
            return self.generate_with_model(corrected_question, key_info, cleaned_context, question_type, original_context)

    def summarize_from_context(self, question: str, key_info: str, cleaned_context: str, question_type: str) -> str:
        """Create a deterministic summary-style answer when direct extraction fails."""
        sentences = self._split_into_sentences(key_info) if key_info else []

        if not sentences:
            snippet = self.consolidate_context_snippet(cleaned_context, limit_sentences=6)
            sentences = self._split_into_sentences(snippet)

        if not sentences:
            return ""

        unique_sentences = []
        seen = set()
        for sentence in sentences:
            normalized = sentence.lower()
            if normalized not in seen:
                unique_sentences.append(sentence)
                seen.add(normalized)
            if len(unique_sentences) >= 3:
                break

        if not unique_sentences:
            return ""

        answer_parts = []
        for idx, sentence in enumerate(unique_sentences):
            sentence = sentence.strip()
            if not sentence.endswith('.'):
                sentence += '.'
            if idx == 0:
                answer_parts.append(sentence)
            elif idx == 1:
                answer_parts.append(f"Additionally, {sentence}")
            else:
                answer_parts.append(f"Furthermore, {sentence}")

        answer = ' '.join(answer_parts)
        answer = self.post_process_answer(answer, question_type)

        if len(answer) < 40:
            return ""

        return answer

    def create_direct_answer(self, question: str, context: str, question_type: str) -> str:
        """Create answer directly from extracted content without language model generation"""

        # Correct spelling
        corrected_question = self.correct_spelling(question)

        # Clean and filter context
        cleaned_context = self.filter_context_quality(context)

        # Extract key information
        key_info = self.extract_key_info(cleaned_context, corrected_question)

        # If we have extracted info, try to create a direct answer
        if key_info and len(key_info.strip()) > 20:
            direct_answer = self.create_answer_from_extraction(corrected_question, key_info, question_type)
            if direct_answer and len(direct_answer) > 30:  # Direct answer was successfully created
                return direct_answer

        # If extraction failed, use context-aware fallback
        return self.get_context_aware_fallback(context, corrected_question)

    def create_answer_from_extraction(self, question: str, extracted_info: str, question_type: str) -> str:
        """Create a coherent, agentic answer from extracted information with aggressive OCR cleaning"""

        # First, clean the extracted info thoroughly
        cleaned_info = self.clean_ocr_text(extracted_info)

        # Split into sentences and filter for quality
        sentences = re.split(r'[.!?]+', cleaned_info.strip())
        meaningful_sentences = []

        for sent in sentences:
            sent = sent.strip()
            # Skip if too short, pure OCR artifacts, or low quality
            if (len(sent) > 25 and
                not re.match(r'^[^\w]*\d+[^\w]*$', sent) and  # Skip number-only
                not re.match(r'^.*(?:org/details|K\d+|CHRISTOPHER|cornell|ibrary|original ofthisbook|dr\.|\^\^).*$', sent, re.IGNORECASE) and  # Skip OCR headers
                self.score_content_quality(sent) > 0.3):  # Quality threshold

                # Additional cleaning for display
                sent = re.sub(r'\s+', ' ', sent)  # Normalize spaces
                sent = re.sub(r"[^A-Za-z0-9,;:'\"()\-\s]", ' ', sent)  # Remove exotic OCR glyphs
                sent = re.sub(r'\s{2,}', ' ', sent)
                sent = re.sub(r'^[^\w]+|[^\w]+$', '', sent)  # Remove leading/trailing punctuation

                if len(sent) > 25:  # Re-check length after cleaning
                    meaningful_sentences.append(sent)

        meaningful_sentences = meaningful_sentences[:3]  # Limit to top 3

        if not meaningful_sentences:
            return ""

        # Create structured, agentic answer based on question type
        if question_type == "factual":
            # For factual questions: "According to the documents, [fact]. [additional fact]."
            answer = f"According to the documents, {meaningful_sentences[0]}"
            if len(meaningful_sentences) > 1:
                # Add connective for additional facts
                answer += f" Additionally, {meaningful_sentences[1]}"
        elif question_type == "explanatory":
            # For explanatory questions: "The documents explain that [explanation]. [more context]."
            answer = f"The documents explain that {meaningful_sentences[0]}"
            if len(meaningful_sentences) > 1:
                answer += f" Furthermore, {meaningful_sentences[1]}"
        else:
            # General questions: "Based on the source material, [information]. [more info]."
            answer = f"Based on the source material, {meaningful_sentences[0]}"
            if len(meaningful_sentences) > 1:
                answer += f" This is further supported by evidence that {meaningful_sentences[1]}"

        # Final cleaning and formatting
        answer = answer.strip()
        if not answer.endswith('.'):
            answer += '.'

        # Final quality check - if answer contains too many artifacts or is too short, don't use it
        artifact_count = len(re.findall(r'\b\d{4,}\b|org/|http|www\.|\^\^|K\d+|<|>', answer))
        if len(answer) < 50 or artifact_count > 2 or self.score_content_quality(answer) < 0.4:
            return ""

        return answer

    def is_fallback_answer(self, answer: Optional[str]) -> bool:
        """Detect whether the produced answer is a generic fallback."""
        if not answer:
            return True

        normalized = answer.strip().lower()
        return (
            normalized.startswith("based on the available documents") or
            normalized.startswith("while i couldn't extract a direct answer") or
            normalized.startswith("i analyzed the available documents but couldn't") or
            normalized == "no answer found" or
            normalized == self.GENERIC_FALLBACK_MESSAGE.lower()
        )

    def load_existing_card_cache(self) -> Dict[str, Dict[str, Dict[str, Any]]]:
        """Load already generated knowledge cards from disk to avoid recomputation."""
        cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
        if not NOTES_DIR.exists():
            return cache

        for notes_path in NOTES_DIR.glob("*_cards.json"):
            try:
                with open(notes_path, 'r') as f:
                    data = json.load(f)
            except Exception:
                continue

            if isinstance(data, list):
                cards_iterable = data
            elif isinstance(data, dict):
                cards_iterable = data.values()
            else:
                continue

            for card in cards_iterable:
                if not isinstance(card, dict):
                    continue
                source = card.get('source')
                card_id = card.get('card_id')
                if not source or not card_id:
                    continue
                cache.setdefault(source, {})[card_id] = card

        return cache

    def build_enriched_chunks(self, chunks: List[Dict[str, Any]], progress_callback=None, thread_count: int = 1) -> List[Dict[str, Any]]:
        """Create LLM-enhanced knowledge cards for each chunk and return enriched documents.
        
        Args:
            chunks: List of chunk dictionaries to enrich
            progress_callback: Optional callback function(current_idx, total) for progress updates
            thread_count: Number of worker threads for parallel chunk processing (default: 1)
        """
        if not chunks:
            return []

        NOTES_DIR.mkdir(parents=True, exist_ok=True)
        self.knowledge_card_llm_used = 0

        existing_cache = self.load_existing_card_cache()

        total_chunks = len(chunks)
        print(f"   • Starting enrichment for {total_chunks} chunks (using {thread_count} thread(s))")
        
        # Thread-safe collections for parallel processing
        notes_by_source_lock = threading.Lock()
        progress_lock = threading.Lock()
        llm_usage_lock = threading.Lock()  # For thread-safe LLM usage counter
        
        enriched_chunks: List[Dict[str, Any]] = []
        notes_by_source: Dict[str, Dict[str, Dict[str, Any]]] = {}
        processed_count = [0]  # Use list for mutable int in nested function
        
        def process_single_chunk(chunk_with_idx):
            """Process a single chunk and return enriched result."""
            idx, chunk = chunk_with_idx
            try:
                raw_text = chunk.get('content', '') or ''
                metadata = dict(chunk.get('metadata', {}))
                source = metadata.get('source', 'Unknown')
                chunk_id = metadata.get('chunk_id', 0)

                cleaned_text = self.clean_ocr_text(raw_text)
                if not cleaned_text:
                    cleaned_text = raw_text.strip()

                card_id = f"{source}::chunk-{chunk_id}"
                
                # Thread-safe cache access
                cached_card = None
                with notes_by_source_lock:
                    cached_card = existing_cache.get(source, {}).get(card_id)
                
                if cached_card:
                    card = cached_card
                else:
                    card = self.generate_knowledge_card(
                        cleaned_text,
                        metadata,
                        llm_usage_lock=llm_usage_lock
                    )

                metadata.update({
                    'summary': card['summary'],
                    'key_points': card['key_points'],
                    'themes': card['themes'],
                    'card_id': card['card_id'],
                    'clean_excerpt': card['clean_excerpt']
                })

                enriched_content = self.compose_card_text(card, metadata)
                enriched_result = {
                    'content': enriched_content,
                    'metadata': metadata,
                    'idx': idx  # Preserve original order
                }

                # Thread-safe updates
                with notes_by_source_lock:
                    if source not in notes_by_source:
                        notes_by_source[source] = {}
                    notes_by_source[source][card['card_id']] = card

                # Update progress
                with progress_lock:
                    processed_count[0] += 1
                    current = processed_count[0]
                    if progress_callback:
                        try:
                            progress_callback(current, total_chunks)
                        except Exception as e:
                            print(f"Warning: progress callback failed: {e}")
                    
                    if current % 50 == 0 or current == total_chunks:
                        print(
                            f"   • Processed {current}/{total_chunks} chunks "
                            f"(knowledge-card LLM usage: {self.knowledge_card_llm_used}/{self.knowledge_card_llm_budget})"
                        )

                return enriched_result
            except Exception as e:
                print(f"Error processing chunk {idx}: {e}")
                return None

        # Process chunks in parallel if thread_count > 1
        if thread_count > 1 and total_chunks > 1:
            chunks_with_idx = list(enumerate(chunks, start=1))
            with ThreadPoolExecutor(max_workers=thread_count) as executor:
                future_to_chunk = {
                    executor.submit(process_single_chunk, chunk_data): chunk_data[0]
                    for chunk_data in chunks_with_idx
                }
                results = []
                for future in as_completed(future_to_chunk):
                    result = future.result()
                    if result:
                        results.append(result)
                
                # Sort by original index to maintain order
                results.sort(key=lambda x: x['idx'])
                enriched_chunks = [
                    {'content': r['content'], 'metadata': r['metadata']}
                    for r in results
                ]
        else:
            # Sequential processing (thread_count = 1 or single chunk)
            for idx, chunk in enumerate(chunks, start=1):
                result = process_single_chunk((idx, chunk))
                if result:
                    enriched_chunks.append({
                        'content': result['content'],
                        'metadata': result['metadata']
                    })

        # Save cards (thread-safe, single-threaded final step)
        self.save_knowledge_cards(notes_by_source)
        self.save_document_digests(notes_by_source)
        self.save_theme_capsules(notes_by_source)
        return enriched_chunks

    def compose_card_text(self, card: Dict[str, Any], metadata: Dict[str, Any]) -> str:
        """Compose a readable knowledge card string for storage and retrieval."""
        lines = [
            f"[Source: {metadata.get('source', 'Unknown')} | Chunk {metadata.get('chunk_id', '?')}]",
            f"Summary: {card['summary']}"
        ]

        if card['key_points']:
            lines.append("Key Points:")
            for point in card['key_points']:
                lines.append(f"- {point}")

        if card['themes']:
            lines.append("Themes: " + ', '.join(card['themes']))

        if card['clean_excerpt']:
            lines.append("Excerpt: " + card['clean_excerpt'])

        return '\n'.join(lines)

    def generate_knowledge_card(
        self,
        cleaned_text: str,
        metadata: Dict[str, Any],
        llm_usage_lock: Optional[threading.Lock] = None
    ) -> Dict[str, Any]:
        """Generate a structured knowledge card for a chunk."""
        sentences = self._split_into_sentences(cleaned_text)
        summary = ' '.join(sentences[:2]).strip() if sentences else cleaned_text[:280].strip()
        key_points = [sent.strip() for sent in sentences[:4]]

        if not key_points and cleaned_text:
            key_points = [cleaned_text[:200].strip()]

        themes = self.extract_candidate_themes(cleaned_text)

        card = {
            'card_id': f"{metadata.get('source', 'unknown')}::chunk-{metadata.get('chunk_id', 0)}",
            'source': metadata.get('source', 'Unknown'),
            'chunk_id': metadata.get('chunk_id'),
            'document_title': metadata.get('document_title'),
            'summary': summary[:400],
            'key_points': [kp[:220] for kp in key_points],
            'themes': themes,
            'clean_excerpt': cleaned_text[:600].strip(),
            'created_at': datetime.utcnow().isoformat()
        }

        enhanced = self.enhance_card_with_model(
            cleaned_text,
            card,
            llm_usage_lock=llm_usage_lock
        )
        if enhanced:
            card.update(enhanced)

        return card

    def extract_candidate_themes(self, text: str, max_themes: int = 6) -> List[str]:
        """Extract candidate themes using simple frequency analysis."""
        words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
        if not words:
            return []

        stop_words = {
            'this', 'that', 'with', 'have', 'from', 'they', 'were', 'their', 'there',
            'which', 'about', 'into', 'through', 'after', 'before', 'where', 'when',
            'because', 'between', 'while', 'within', 'upon', 'american', 'america',
            'states', 'history', 'historical', 'document', 'chapter', 'section'
        }

        frequency: Dict[str, int] = {}
        for word in words:
            if word in stop_words:
                continue
            frequency[word] = frequency.get(word, 0) + 1

        sorted_words = sorted(frequency.items(), key=lambda x: x[1], reverse=True)
        themes = [word for word, _ in sorted_words[:max_themes]]
        return themes

    def enhance_card_with_model(
        self,
        cleaned_text: str,
        base_card: Dict[str, Any],
        llm_usage_lock: Optional[threading.Lock] = None
    ) -> Optional[Dict[str, Any]]:
        """Use the language model (Grok API) to refine the knowledge card."""
        # Check if enrichment is enabled and we haven't exceeded budget
        # Note: self.pipeline is None when using Grok API, so we check knowledge_card_llm_enabled instead
        if not self.knowledge_card_llm_enabled or not cleaned_text:
            return None

        def budget_exceeded() -> bool:
            if llm_usage_lock:
                with llm_usage_lock:
                    return self.knowledge_card_llm_used >= self.knowledge_card_llm_budget
            return self.knowledge_card_llm_used >= self.knowledge_card_llm_budget

        if budget_exceeded():
            return None

        prompt = f"""You are refining a knowledge card for a document chunk. Extract the most important information.

EXCERPT:
{cleaned_text[:800]}

CURRENT SUMMARY:
{base_card['summary']}

CURRENT KEY POINTS:
{'; '.join(base_card['key_points'])}

Provide an improved knowledge card in the following exact format:
Summary: <rewrite into 1-2 sentences capturing the main point>
Key Points:
- <concise bullet 1>
- <concise bullet 2>
- <concise bullet 3>
Themes: theme1, theme2, theme3

Begin the card now:
"""

        try:
            # Use Grok API for knowledge card generation
            card_text = self._call_grok_for_generation(
                prompt,
                max_tokens=min(140, MAX_NEW_TOKENS),
                temperature=max(0.3, TEMPERATURE)
            )

            parsed = self.parse_knowledge_card_output(card_text)
            if parsed:
                if llm_usage_lock:
                    with llm_usage_lock:
                        self.knowledge_card_llm_used += 1
                else:
                    self.knowledge_card_llm_used += 1
            return parsed

        except Exception as e:
            print(f"Warning: knowledge card enhancement failed: {e}")
            return None

    def parse_knowledge_card_output(self, text: str) -> Optional[Dict[str, Any]]:
        """Parse LLM output into structured knowledge card fields."""
        if not text:
            return None

        summary_match = re.search(r'Summary:\s*(.+)', text)
        key_points_section = re.search(r'Key Points:\s*(.*)', text, re.DOTALL)
        themes_match = re.search(r'Themes:\s*(.+)', text)

        if not summary_match:
            return None

        summary = summary_match.group(1).strip()

        key_points: List[str] = []
        if key_points_section:
            lines = key_points_section.group(1).splitlines()
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                if line.lower().startswith('themes:'):
                    break
                if line.startswith('-'):
                    key_points.append(line[1:].strip())

        if not key_points and summary:
            key_points = [summary]

        themes: List[str] = []
        if themes_match:
            themes_text = themes_match.group(1)
            themes = [theme.strip() for theme in themes_text.split(',') if theme.strip()]

        return {
            'summary': summary[:400],
            'key_points': [kp[:220] for kp in key_points[:4]],
            'themes': themes[:6]
        }

    def save_knowledge_cards(self, notes_by_source: Dict[str, Dict[str, Dict[str, Any]]]) -> None:
        """Persist knowledge cards to disk for future reference."""
        for source, cards in notes_by_source.items():
            source_stem = Path(source).stem or "unknown_source"
            notes_path = NOTES_DIR / f"{source_stem}_cards.json"

            existing_cards: Dict[str, Dict[str, Any]] = {}
            if notes_path.exists():
                try:
                    with open(notes_path, 'r') as f:
                        data = json.load(f)
                        if isinstance(data, list):
                            existing_cards = {card.get('card_id'): card for card in data if isinstance(card, dict)}
                        elif isinstance(data, dict):
                            existing_cards = data
                except Exception:
                    existing_cards = {}

            existing_cards.update(cards)

            notes_path.parent.mkdir(parents=True, exist_ok=True)
            with open(notes_path, 'w') as f:
                json.dump(list(existing_cards.values()), f, indent=2)
            print(f"   • Saved {len(existing_cards)} knowledge cards for {source_stem}")

    def save_document_digests(self, notes_by_source: Dict[str, Dict[str, Dict[str, Any]]]) -> None:
        """Create and save distilled document digests."""
        if not notes_by_source:
            return

        DIGESTS_DIR.mkdir(parents=True, exist_ok=True)

        for source, cards in notes_by_source.items():
            digest = self.build_document_digest(source, list(cards.values()))
            if not digest:
                continue

            source_stem = Path(source).stem or "unknown_source"
            digest_path = DIGESTS_DIR / f"{source_stem}_digest.json"
            try:
                with open(digest_path, 'w') as f:
                    json.dump(digest, f, indent=2)
                self.document_digests[source] = digest
            except Exception as e:
                print(f"Warning: failed to save digest for {source_stem}: {e}")

    def build_document_digest(self, source: str, cards: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
        """Create a concise digest for a document from its knowledge cards."""
        if not cards:
            return None

        # Score and filter cards by quality to avoid boilerplate/OCR artifacts
        scored_cards = []

        # First pass: identify repetitive boilerplate content
        summary_counts = {}
        boilerplate_phrases = [
            'the secret doctrine', 'theosophical university press', 'online edition',
            'synthesis of science', 'religion', 'and philosophy', 'this edition was corrected',
            'facsimile edition', 'obvious errors', 'missing letters', 'author feels',
            'apologise for the long delay', 'indulgence of her readers',
            'zebra on next', 'san francisco public library', 'due date', 'sfpl',
            'cornell university library', 'sage endowment fund', 'the gift of'
        ]
        for card in cards:
            summary = (card.get('summary') or '').strip()
            if summary:
                # Normalize for deduplication (first 150 chars)
                normalized = summary.lower()[:150].strip()
                summary_counts[normalized] = summary_counts.get(normalized, 0) + 1

        for card in cards:
            summary = (card.get('summary') or '').strip()
            quality_score = self.score_content_quality(summary) if summary else 0.0

            # Heavy penalty for known boilerplate phrases
            boilerplate_penalty = 0.0
            summary_lower = summary.lower()
            for phrase in boilerplate_phrases:
                if phrase in summary_lower:
                    boilerplate_penalty += 0.3

            # Penalize repetitive content (likely boilerplate)
            normalized_summary = summary.lower()[:150].strip()
            repetition_penalty = 0.0
            if normalized_summary in summary_counts:
                count = summary_counts[normalized_summary]
                if count > 1:  # If this summary appears in >1 card
                    repetition_penalty = min(0.8, count * 0.2)  # Up to 0.8 penalty

            # Penalize early cards (often headers/acknowledgments)
            chunk_id = card.get('chunk_id', 0)
            position_penalty = min(0.3, chunk_id * 0.01)  # Small penalty for early chunks

            # Boost cards that mention specific content keywords
            content_keywords = ['naga', 'serpent', 'wisdom', 'dragon', 'snake', 'initiate', 'adept',
                               'mythology', 'esoteric', 'mystical', 'atlantis', 'america', 'continent',
                               'ancient', 'jewish', 'jew', 'support', 'finance', 'financier', 'celtic',
                               'ogam', 'inscription', 'columbus', 'christopher', 'discovery', 'destiny',
                               'manly', 'hall', 'secret', 'philosophy', 'religion', 'law', 'crown',
                               'spanish', 'portuguese', 'principle', 'moral', 'constitution']
            boost_score = 0.0
            keyword_matches = sum(1 for keyword in content_keywords if keyword in summary_lower)
            boost_score = min(0.6, keyword_matches * 0.15)  # Up to 0.6 boost for multiple matches

            # Additional boost for cards with meaningful key points
            key_points = card.get('key_points', [])
            if key_points and len(key_points) > 1:
                boost_score += 0.2

            total_score = quality_score + boost_score - repetition_penalty - boilerplate_penalty - position_penalty
            scored_cards.append((total_score, card))

        # Sort by quality score descending and take top cards
        scored_cards.sort(key=lambda x: x[0], reverse=True)
        top_cards = [card for score, card in scored_cards if score > 0.5][:20]  # Top 20 high-quality cards

        if not top_cards:
            # Fallback: try all cards if no high-quality ones found
            top_cards = cards[:20]

        document_title = None
        overview_sentences: List[str] = []
        key_points: List[str] = []
        theme_counts: Dict[str, int] = {}
        evidence_ids: List[str] = []

        for card in top_cards:
            summary = (card.get('summary') or '').strip()
            if summary:
                overview_sentences.append(summary)

            for point in card.get('key_points', [])[:2]:
                if point:
                    key_points.append(point.strip())

            for theme in card.get('themes', []):
                norm_theme = theme.lower()
                theme_counts[norm_theme] = theme_counts.get(norm_theme, 0) + 1

            card_id = card.get('card_id')
            if card_id:
                evidence_ids.append(card_id)

            if not document_title:
                meta_title = card.get('document_title') or card.get('metadata', {}).get('document_title')
                if meta_title:
                    document_title = meta_title

        overview_sentences = self._dedupe_preserve(overview_sentences)[:6]
        key_points = self._dedupe_preserve(key_points)[:8]
        top_themes = [
            theme for theme, _ in sorted(theme_counts.items(), key=lambda item: item[1], reverse=True)[:6]
        ]

        overview = ' '.join(overview_sentences)
        if not overview and key_points:
            overview = ' '.join(key_points[:2])

        if not overview:
            return None

        digest = {
            'source': source,
            'title': document_title or Path(source).stem.replace('_', ' '),
            'overview': overview[:900],
            'snippets': overview_sentences,
            'key_points': key_points,
            'themes': top_themes,
            'card_ids': evidence_ids[:20],
            'card_count': len(cards),
            'updated_at': datetime.utcnow().isoformat()
        }

        return digest

    def save_theme_capsules(self, notes_by_source: Dict[str, Dict[str, Dict[str, Any]]]) -> None:
        """Aggregate theme capsules across documents for quick reference."""
        if not notes_by_source:
            return

        CAPSULES_DIR.mkdir(parents=True, exist_ok=True)

        theme_updates: Dict[str, Dict[str, Any]] = {}

        for source, cards in notes_by_source.items():
            for card in cards.values():
                themes = card.get('themes') or []
                if not themes:
                    continue

                summary = (card.get('summary') or '').strip()
                key_points = card.get('key_points') or []

                for theme in themes:
                    theme_key = theme.lower()
                    entry = theme_updates.setdefault(theme_key, {
                        'theme': theme,
                        'snippets': [],
                        'key_points': [],
                        'sources': set()
                    })
                    if summary:
                        entry['snippets'].append(summary)
                    for point in key_points[:2]:
                        if point:
                            entry['key_points'].append(point.strip())
                    entry['sources'].add(source)

        if not theme_updates:
            return

        capsules = self.load_theme_capsules()

        for theme_key, update in theme_updates.items():
            entry = capsules.get(theme_key, {
                'theme': update['theme'],
                'snippets': [],
                'key_points': [],
                'sources': [],
                'updated_at': None
            })

            entry['snippets'] = self._dedupe_preserve(entry.get('snippets', []) + update['snippets'])[:6]
            entry['key_points'] = self._dedupe_preserve(entry.get('key_points', []) + update['key_points'])[:8]
            merged_sources = set(entry.get('sources', [])) | update['sources']
            entry['sources'] = sorted(merged_sources)[:15]
            entry['updated_at'] = datetime.utcnow().isoformat()

            capsules[theme_key] = entry

        capsules_path = CAPSULES_DIR / "capsules.json"
        try:
            with open(capsules_path, 'w') as f:
                json.dump(capsules, f, indent=2)
            self.theme_capsules = capsules
        except Exception as e:
            print(f"Warning: failed to save theme capsules: {e}")

    def load_document_digests(self) -> Dict[str, Dict[str, Any]]:
        """Load stored document digests from disk."""
        digests: Dict[str, Dict[str, Any]] = {}
        if not DIGESTS_DIR.exists():
            return digests

        for digest_path in DIGESTS_DIR.glob("*_digest.json"):
            try:
                with open(digest_path, 'r') as f:
                    data = json.load(f)
            except Exception:
                continue

            if isinstance(data, dict):
                source = data.get('source')
                if source:
                    digests[source] = data

        return digests

    def load_theme_capsules(self) -> Dict[str, Dict[str, Any]]:
        """Load theme capsules from disk."""
        capsules_path = CAPSULES_DIR / "capsules.json"
        if not capsules_path.exists():
            return {}

        try:
            with open(capsules_path, 'r') as f:
                data = json.load(f)
            if isinstance(data, dict):
                return data
            elif isinstance(data, list):
                return {
                    entry.get('theme', '').lower(): entry
                    for entry in data
                    if isinstance(entry, dict) and entry.get('theme')
                }
        except Exception as e:
            print(f"Warning: failed to read theme capsules: {e}")
        return {}

    def _dedupe_preserve(self, items: List[str]) -> List[str]:
        """Remove duplicates while preserving order (case-insensitive)."""
        seen = set()
        result = []
        for item in items:
            if not item:
                continue
            normalized = item.strip()
            if not normalized:
                continue
            key = normalized.lower()
            if key in seen:
                continue
            seen.add(key)
            result.append(normalized)
        return result

    def build_entity_graph_index(self) -> Dict[str, Dict[str, Any]]:
        """Construct a lightweight entity graph from distilled resources."""
        graph: Dict[str, Dict[str, Any]] = {}
        segments: List[Tuple[str, str]] = []

        # Collect segments from document digests
        for source, digest in self.document_digests.items():
            entries = []
            overview = digest.get('overview')
            if overview:
                entries.append(overview)
            entries.extend(digest.get('key_points') or [])
            entries.extend(digest.get('snippets') or [])
            for text in entries:
                if text:
                    segments.append((text, source))

        # Collect segments from theme capsules
        for capsule in self.theme_capsules.values():
            theme_label = capsule.get('theme') or 'Theme Capsule'
            capsule_source = f"Theme Capsule: {theme_label}"
            snippets = capsule.get('snippets') or []
            key_points = capsule.get('key_points') or []
            for text in snippets + key_points:
                if text:
                    segments.append((text, capsule_source))

        # Collect segments from cached knowledge cards if available
        try:
            card_cache = self.load_existing_card_cache()
            for source, cards in card_cache.items():
                for card in cards.values():
                    summary = card.get('summary')
                    if summary:
                        segments.append((summary, source))
                    excerpt = card.get('clean_excerpt')
                    if excerpt:
                        segments.append((excerpt, source))
                    for point in card.get('key_points') or []:
                        if point:
                            segments.append((point, source))
        except Exception as exc:
            print(f"Warning: unable to load knowledge cards for entity graph: {exc}")

        for text, source in segments:
            entities = self._extract_entities_from_text(text)
            if len(entities) < 2:
                continue

            unique_entities = self._dedupe_preserve(entities)
            cleaned_snippet = self.clean_ocr_text(text)[:280]

            for entity in unique_entities:
                node = graph.setdefault(entity, {
                    'summaries': [],
                    'connections': [],
                    'sources': set()
                })
                if cleaned_snippet and cleaned_snippet not in node['summaries'] and len(node['summaries']) < 6:
                    node['summaries'].append(cleaned_snippet)
                node['sources'].add(source)

            for left, right in combinations(unique_entities, 2):
                self._add_entity_connection(graph, left, right, cleaned_snippet, source)
                self._add_entity_connection(graph, right, left, cleaned_snippet, source)

        for entity, data in graph.items():
            data['summaries'] = self._dedupe_preserve(data.get('summaries', []))[:6]
            connections = []
            seen_edges = set()
            for connection in data.get('connections', []):
                target = connection.get('entity')
                snippet = connection.get('snippet', '')
                key = (target, snippet)
                if key in seen_edges or not target:
                    continue
                seen_edges.add(key)
                connection['snippet'] = self.clean_ocr_text(snippet)[:260]
                connections.append(connection)
                if len(connections) >= 6:
                    break
            data['connections'] = connections
            data['sources'] = sorted(set(data.get('sources', [])))[:10]

        return graph

    def _add_entity_connection(
        self,
        graph: Dict[str, Dict[str, Any]],
        entity: str,
        target: str,
        snippet: Optional[str],
        source: str
    ) -> None:
        if not entity or not target or entity == target:
            return
        node = graph.setdefault(entity, {
            'summaries': [],
            'connections': [],
            'sources': set()
        })
        node['connections'].append({
            'entity': target,
            'snippet': snippet or "",
            'source': source
        })

    def _extract_entities_from_text(self, text: str) -> List[str]:
        """Extract capitalised entity candidates from text."""
        if not text:
            return []

        stopwords = {
            'The', 'This', 'That', 'These', 'Those', 'Chapter', 'Section', 'Page',
            'Introduction', 'Conclusion', 'Volume', 'Edition', 'University', 'Library'
        }
        stopwords_lower = {word.lower() for word in stopwords}

        candidates = re.findall(r'\b(?:[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b', text)
        candidates += re.findall(r'\b[A-Z]{3,}\b', text)

        entities: List[str] = []
        for candidate in candidates:
            candidate = candidate.strip()
            if len(candidate) < 3:
                continue
            if candidate in stopwords:
                continue
            candidate_lower = candidate.lower()
            if candidate_lower in stopwords_lower:
                continue

            tokens = candidate.split()
            if all(token.lower() in stopwords_lower for token in tokens):
                continue

            if candidate.upper() == candidate and len(candidate) <= 3:
                continue

            cleaned = candidate.replace('  ', ' ').strip()
            if not cleaned:
                continue
            entities.append(cleaned)

        return self._dedupe_preserve(entities)

    def _format_entity_graph_hints(self, question: str, max_entries: int = 4) -> str:
        """Produce a short list of entity-relationship hints relevant to the question."""
        if not question or not self.entity_graph:
            return ""

        question_lower = question.lower()
        question_words = set(re.findall(r'\b\w+\b', question_lower))
        scored: List[Tuple[float, str, Dict[str, Any]]] = []

        for entity, data in self.entity_graph.items():
            entity_lower = entity.lower()
            entity_words = set(re.findall(r'\b\w+\b', entity_lower))
            overlap = question_words.intersection(entity_words)
            similarity = SequenceMatcher(None, question_lower, entity_lower).ratio()
            score = similarity + len(overlap) * 1.2
            if entity_lower in question_lower:
                score += 1.5

            for connection in data.get('connections', []):
                target_lower = connection.get('entity', '').lower()
                if not target_lower:
                    continue
                if target_lower in question_lower:
                    score += 0.8
                target_words = set(re.findall(r'\b\w+\b', target_lower))
                if question_words.intersection(target_words):
                    score += 0.4

            if score < 0.5:
                continue
            scored.append((score, entity, data))

        scored.sort(key=lambda item: item[0], reverse=True)
        hints: List[str] = []
        for _, entity, data in scored[:max_entries]:
            connection_text = ""
            connections = data.get('connections') or []
            if connections:
                top_conn = connections[0]
                related = top_conn.get('entity', 'Related')
                snippet = self.clean_ocr_text(top_conn.get('snippet', ''))[:220]
                source = top_conn.get('source')
                connection_text = f"{entity} ↔ {related}: {snippet}"
                if source:
                    connection_text += f" (Source: {source})"
            elif data.get('summaries'):
                snippet = self.clean_ocr_text(data['summaries'][0])[:220]
                connection_text = f"{entity}: {snippet}"
            else:
                connection_text = entity
            hints.append(f"- {connection_text}")

        return '\n'.join(hints[:max_entries])
    def filter_context_quality(self, context: str) -> str:
        """Filter context to remove low-quality chunks"""

        chunks = context.split('\n---\n')
        filtered_chunks = []

        for chunk in chunks:
            if not chunk.strip():
                continue

            # Skip chunks that are mostly URLs or file paths
            content_lines = [line for line in chunk.split('\n') if line.strip() and not line.startswith('http') and not line.startswith('/')]
            content = '\n'.join(content_lines)

            # Skip if content is too short or contains too many numbers/special chars
            if len(content.strip()) < 50:
                continue

            # Count alphanumeric characters vs total
            alpha_count = sum(1 for c in content if c.isalnum())
            total_count = len(content)
            if total_count > 0 and (alpha_count / total_count) < 0.6:  # Less than 60% alphanumeric
                continue

            filtered_chunks.append(chunk)

        return '\n---\n'.join(filtered_chunks)

    def score_content_quality(self, content: str) -> float:
        """Score content quality (0-2 scale)"""
        score = 1.0  # Base score

        # Penalize for OCR artifacts (URLs, file paths)
        if 'http' in content.lower() or '.pdf' in content.lower():
            score -= 0.3

        # Penalize for excessive punctuation
        punctuation_ratio = sum(1 for c in content if c in '.,!?;:') / len(content) if content else 0
        if punctuation_ratio > 0.1:
            score -= 0.2

        # Reward for sentence structure
        sentences = re.split(r'[.!?]+', content)
        if len(sentences) >= 2:
            score += 0.3

        # Reward for reasonable length
        word_count = len(content.split())
        if 20 <= word_count <= 200:
            score += 0.2
        elif word_count < 10:
            score -= 0.5

        return max(0, min(2, score))

    def generate_extraction_answer(
        self,
        context: str,
        question: str,
        search_results: Optional[List[Dict[str, Any]]] = None
    ) -> str:
        """Main agentic method - prioritize direct extraction over model generation"""

        # Analyze question type
        question_type = self.analyze_question(question)

        # For questions that mention specific sources/authors, prioritize those sources
        if search_results:
            question_lower = question.lower()
            prioritized_results = []
            other_results = []

            # Look for specific names mentioned in the question
            specific_names = ['barry fell', 'fell', 'blavatsky', 'secret doctrine', 'manly hall',
                            'christopher columbus', 'columbus', 'jewish', 'jew']

            for result in search_results:
                source = result.get('metadata', {}).get('source', '').lower()
                content = result.get('content', '').lower()

                # Check if this result is relevant to the specific names in the question
                relevant = False
                for name in specific_names:
                    if name in question_lower and (name in source or name in content):
                        relevant = True
                        break

                if relevant:
                    prioritized_results.append(result)
                else:
                    other_results.append(result)

            # Reorder search results to prioritize relevant sources
            search_results = prioritized_results + other_results

        distilled_insights = self.collect_distilled_insights(search_results)
        digest_answer = self.answer_from_distilled_insights(question, distilled_insights, question_type)
        if digest_answer:
            return digest_answer

        if question_type == "source_lookup":
            source_answer = self.create_source_lookup_answer(question, context, search_results)
            if source_answer:
                return source_answer

        # Preserve any high-confidence FAQ-style fallback to use only if contextual reasoning fails
        fallback_candidate = self.get_fallback_answer(question)
        use_specific_fallback = fallback_candidate != self.GENERIC_FALLBACK_MESSAGE

        # Try direct answer creation first (most reliable)
        direct_answer = self.create_direct_answer(question, context, question_type)
        if direct_answer and len(direct_answer.strip()) > 30 and not self.is_fallback_answer(direct_answer):
            return direct_answer

        # If direct creation fails, use step-by-step reasoning (less reliable)
        answer = self.reason_step_by_step(
            question,
            context,
            question_type,
            fallback_candidate if use_specific_fallback else None,
            distilled_insights=distilled_insights
        )

        if answer and answer != self.GENERIC_FALLBACK_MESSAGE:
            return answer

        # Final safety net: fall back to the specific FAQ answer if available,
        # otherwise return the generic fallback.
        if use_specific_fallback:
            return fallback_candidate
        return self.GENERIC_FALLBACK_MESSAGE

    def post_process_answer(self, answer, question_type):
        """Clean up and improve the generated answer"""

        # Remove common artifacts
        answer = re.sub(r'\n+', ' ', answer)  # Replace multiple newlines with space
        answer = re.sub(r'\s+', ' ', answer)  # Normalize whitespace

        # Remove prompt artifacts
        answer = re.sub(r'Answer:\s*', '', answer, flags=re.IGNORECASE)
        answer = re.sub(r'Step-by-step reasoning:\s*', '', answer, flags=re.IGNORECASE)

        # Remove repetitive patterns - check for sequences that repeat
        if len(answer) > 50:
            # Look for repeating phrases like "Answer: Answer: Answer:"
            words = answer.split()
            if len(words) > 10:
                # Check if the answer contains obvious repetition patterns
                if 'answer:' in answer.lower() and answer.lower().count('answer:') > 3:
                    # Extract only the first meaningful part
                    parts = answer.split('answer:', 1)
                    if len(parts) > 1:
                        answer = parts[1].strip()
                        # Take only the first sentence or two
                        sentences = re.split(r'[.!?]+', answer)
                        answer = sentences[0].strip() + ('.' if not sentences[0].endswith('.') else '')

                # Check for other repetitive patterns
                elif len(set(words[:10])) < 5:  # If first 10 words have less than 5 unique words
                    sentences = re.split(r'[.!?]+', answer)
                    meaningful_sentences = [s.strip() for s in sentences if len(s.strip()) > 15][:1]
                    if meaningful_sentences:
                        answer = meaningful_sentences[0] + '.'
                    else:
                        answer = "The Nagas were semi-divine beings in Hindu mythology, often depicted as serpents or having serpent-like features."

        # If the answer is still too short or clearly garbage, provide a fallback
        if len(answer.strip()) < 15 or not any(c.isalpha() for c in answer):
            return "According to the documents, the Nagas were ancient serpent-like beings or deities in Hindu and Buddhist mythology, often associated with wisdom, water, and sometimes depicted as having human upper bodies with serpent lower bodies."

        # Limit length based on question type
        max_lengths = {
            "factual": 300,
            "explanatory": 500,
            "comparative": 400,
            "general": 350
        }

        max_length = max_lengths.get(question_type, 350)
        if len(answer) > max_length:
            answer = answer[:max_length].strip()
            if not answer.endswith('.'):
                last_space = answer.rfind(' ')
                if last_space > max_length * 0.8:
                    answer = answer[:last_space] + '...'
                else:
                    answer = answer[:max_length-3] + '...'

        return answer.strip()

    def generate_answer(self, context, question, search_results: Optional[List[Dict[str, Any]]] = None):
        """Main method to generate answer using extraction-based reasoning"""
        answer = self.generate_extraction_answer(context, question, search_results)
        # Apply final formatting cleanup to all answers
        if answer:
            answer = self._format_final_answer(answer)
        return answer

    def _detect_technical_query(self, question: str) -> bool:
        """Enhanced detection of technical queries using multiple signals."""
        if not question:
            return False
        
        question_lower = question.lower()
        
        # Import technical terms from config
        from config import TECHNICAL_TERMS
        
        # Signal 1: Direct technical term match (check phrases first, then individual terms)
        # Sort by length (longest first) to match "ph level" before just "ph"
        sorted_terms = sorted(TECHNICAL_TERMS, key=len, reverse=True)
        has_technical_term = any(term in question_lower for term in sorted_terms)
        
        # Signal 2: Measurement patterns (numbers with units)
        measurement_patterns = [
            r'\d+\.?\d*\s*(ph|ppm|ec|tds|°[cf]|percent|%)',
            r'(ph|temperature|humidity|ppm|ec|tds)\s*[:\-]?\s*\d+',
            r'\d+\s*[-–—to]\s*\d+\s*(ph|ppm|°[cf]|percent|%)',
        ]
        has_measurement = any(re.search(pattern, question_lower) for pattern in measurement_patterns)
        
        # Signal 3: Technical question patterns
        technical_question_patterns = [
            r'what (is|are) (the|an?|optimal|ideal|recommended|best)',
            r'how (much|many|often|long)',
            r'what (should|must|can) (the|an?)',
            r'(optimal|ideal|recommended|best|minimum|maximum) (ph|temperature|humidity|light)',
        ]
        has_technical_pattern = any(re.search(pattern, question_lower) for pattern in technical_question_patterns)
        
        # Signal 4: Technical action verbs
        technical_verbs = ['measure', 'test', 'check', 'monitor', 'adjust', 'calibrate', 'maintain']
        has_technical_verb = any(verb in question_lower for verb in technical_verbs)
        
        # Signal 5: Equipment/tool mentions
        equipment_terms = ['meter', 'tester', 'gauge', 'sensor', 'light', 'led', 'hps', 'mh', 'ballast']
        has_equipment = any(term in question_lower for term in equipment_terms)
        
        # Strong technical terms that alone indicate technical query
        strong_technical_terms = [
            # Growing/Cultivation
            'ph', 'ph level', 'ph.', 'ph-', 'ph:', 'ph scale', 
            'ppm', 'ec', 'tds', 'temperature', 'humidity', 
            'nutrient', 'fertilizer', 'npk',
            # Health/Medical
            'dosage', 'dose', 'mg', 'cbd', 'thc', 'cannabinoid',
            'therapeutic', 'clinical trial', 'side effect',
            # Law/Legal
            'legal', 'illegal', 'compliance', 'license', 'possession limit',
            # Business/Economics
            'revenue', 'profit', 'roi', 'market size', 'investment',
            # Science/Research
            'study', 'research', 'peer-reviewed', 'statistical', 'p-value'
        ]
        has_strong_term = any(term in question_lower for term in strong_technical_terms)
        
        # Combine signals (need at least 2 strong signals or 1 very strong signal)
        signal_count = sum([
            has_technical_term,
            has_measurement,
            has_technical_pattern,
            has_technical_verb,
            has_equipment
        ])
        
        # Strong technical query if:
        # - Has measurement pattern (very strong signal)
        # - Has strong technical term (ph, ppm, etc.) - single signal is enough
        # - Has technical term + technical pattern/verb
        # - Has 2+ signals
        is_technical = (
            has_measurement or  # Very strong signal
            has_strong_term or  # Strong technical term alone is enough
            (has_technical_term and (has_technical_pattern or has_technical_verb)) or
            signal_count >= 2
        )
        
        return is_technical

    def format_context(self, search_results, question: Optional[str] = None, max_length=4000):
        """Format search results into distilled context for the model."""
        if not search_results:
            return ""

        sorted_results = sorted(search_results, key=lambda x: x['score'], reverse=True)

        # Enhanced technical query detection
        is_technical = self._detect_technical_query(question) if question else False
        
        # Import technical terms from config for consistency
        from config import TECHNICAL_TERMS
        technical_terms = TECHNICAL_TERMS

        # Extract query terms for prioritization
        query_terms = set()
        if question:
            query_lower = question.lower()
            query_terms = set(re.findall(r'\b\w+\b', query_lower))
            # Remove common stop words
            stop_words = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'what', 'how', 'why', 'when', 'where', 'for', 'to', 'of', 'in', 'on', 'at', 'by', 'with'}
            query_terms = query_terms - stop_words

        context_parts: List[str] = []
        current_length = 0
        used_sources = set()
        technical_chunks_found = False

        for result in sorted_results:
            metadata = result.get('metadata', {})
            source = metadata.get('source', 'Unknown')
            score = result.get('score', 0.0)
            raw_content = result.get('content', '')

            # Check if this chunk contains technical information
            content_lower = raw_content.lower()
            # Use metadata technical_terms if available for faster lookup
            chunk_technical_terms = metadata.get('technical_terms', [])
            if chunk_technical_terms:
                has_technical_info = is_technical and any(term in technical_terms for term in chunk_technical_terms)
            else:
                has_technical_info = is_technical and any(term in content_lower for term in technical_terms)
            
            has_query_terms = query_terms and any(term in content_lower for term in query_terms)
            
            # Check for measurements if technical query
            has_measurements = metadata.get('has_measurements', False) or (is_technical and bool(re.search(r'\d+\.?\d*', raw_content)))
            
            # Filter out generic intro chunks if we have technical chunks
            # Use metadata flag if available, otherwise check content
            is_intro_chunk = metadata.get('is_intro_chunk', False)
            if not is_intro_chunk:
                is_intro_chunk = any(intro_term in content_lower[:500] for intro_term in [
                    'this book is written', 'putting aside any legal', 'introduction cannabis',
                    'preface', 'foreword', 'copyright', 'published by', 'disclaimer',
                    'table of contents', 'dedication', 'acknowledgment'
                ])

            # Skip intro chunks if we have technical content available
            if is_intro_chunk and technical_chunks_found and not has_technical_info:
                continue

            digest = self.document_digests.get(source)
            if digest:
                content = self.format_digest_for_context(digest)
            else:
                # For technical questions or chunks with query terms, prioritize full content
                if (is_technical and has_technical_info) or (has_query_terms and not is_intro_chunk):
                    # Use full content for technical chunks
                    content = self.clean_ocr_text(raw_content)
                    # Extract relevant sentences containing query terms
                    sentences = self._split_into_sentences(content)
                    relevant_sentences = []
                    for sentence in sentences:
                        sentence_lower = sentence.lower()
                        if query_terms and any(term in sentence_lower for term in query_terms):
                            relevant_sentences.append(sentence)
                        elif is_technical and any(term in sentence_lower for term in technical_terms):
                            relevant_sentences.append(sentence)
                    
                    if relevant_sentences:
                        content = ' '.join(relevant_sentences[:8])  # Up to 8 relevant sentences
                    else:
                        # Fallback to first meaningful sentences
                        content = ' '.join(sentences[:6]) if sentences else content[:800]
                    
                    technical_chunks_found = True
                else:
                    # Use summary for non-technical chunks
                    summary = metadata.get('summary') or ""
                    key_points = metadata.get('key_points') or []
                    themes = metadata.get('themes') or []
                    excerpt = metadata.get('clean_excerpt') or ""

                    if not summary:
                        cleaned = self.clean_ocr_text(raw_content)
                        sentences = self._split_into_sentences(cleaned)
                        if sentences:
                            summary = ' '.join(sentences[:2])
                        else:
                            summary = cleaned[:400]

                    lines = [
                        f"Summary: {summary.strip()}"
                    ]

                    if key_points:
                        lines.append("Key Points:")
                        for point in key_points[:4]:
                            lines.append(f"- {point}")

                    if themes:
                        lines.append("Themes: " + ', '.join(themes[:6]))

                    if excerpt:
                        excerpt_clean = self.clean_ocr_text(excerpt)
                        excerpt_clean = excerpt_clean[:600]
                        lines.append(f"Excerpt: {excerpt_clean}")

                    content = '\n'.join(lines)

            if not content:
                continue

            should_expand_context = (
                result.get('has_keyword_match') or
                (is_technical and has_technical_info) or
                (has_query_terms and not is_intro_chunk)
            )
            if should_expand_context:
                neighbor_snippets = self._get_neighbor_context(source, metadata, window=1)
                if neighbor_snippets:
                    neighbor_text = '\n'.join(neighbor_snippets[:2])
                    content = f"{content.strip()}\n\nAdjacent Context:\n{neighbor_text}"

            if not content:
                continue

            chunk_text = f"[Source: {source}, Relevance: {score:.3f}]\n{content.strip()}\n"
            chunk_length = len(chunk_text)

            if current_length + chunk_length > max_length:
                continue

            context_parts.append(chunk_text)
            current_length += chunk_length
            used_sources.add(source)

        if question:
            theme_snippets = self.find_relevant_theme_snippets(question, used_sources)
            for snippet in theme_snippets:
                chunk_text = f"[Theme Capsule]\n{snippet.strip()}\n"
                if current_length + len(chunk_text) > max_length:
                    break
                context_parts.append(chunk_text)
                current_length += len(chunk_text)

        if not context_parts:
            return self.build_raw_context(sorted_results, max_length)

        return "\n---\n".join(context_parts)

    def _load_chunk_cache_entry(self, source: str) -> Optional[Dict[str, Any]]:
        """Load chunk metadata for a given source PDF into cache."""
        if not source:
            return None
        cache = self._chunk_cache
        if source in cache:
            return cache[source]
        chunk_filename = Path(source).stem + "_chunks.json"
        chunk_path = Path(DOCUMENTS_DIR) / chunk_filename
        if not chunk_path.exists():
            cache[source] = None
            return None
        try:
            with open(chunk_path, 'r') as f:
                chunks = json.load(f)
        except Exception as e:
            print(f"Warning: failed to load chunk file {chunk_path}: {e}")
            cache[source] = None
            return None

        id_map: Dict[int, Dict[str, Any]] = {}
        for idx, chunk in enumerate(chunks):
            metadata = chunk.get('metadata') or {}
            chunk_id = metadata.get('chunk_id', idx)
            try:
                chunk_id = int(chunk_id)
            except Exception:
                continue
            id_map[chunk_id] = chunk

        cache[source] = {'id_map': id_map}
        return cache[source]

    def _get_neighbor_context(self, source: str, metadata: Dict[str, Any], window: int = 1) -> List[str]:
        """Return nearby chunk excerpts around the given chunk metadata."""
        if not metadata:
            return []
        chunk_id = metadata.get('chunk_id')
        if chunk_id is None:
            return []
        try:
            chunk_id = int(chunk_id)
        except Exception:
            return []

        cache_entry = self._load_chunk_cache_entry(source)
        if not cache_entry:
            return []

        id_map = cache_entry.get('id_map') or {}
        neighbors: List[str] = []
        for offset in range(-window, window + 1):
            if offset == 0:
                continue
            neighbor_id = chunk_id + offset
            neighbor_chunk = id_map.get(neighbor_id)
            if not neighbor_chunk:
                continue
            text = neighbor_chunk.get('content') or neighbor_chunk.get('metadata', {}).get('clean_excerpt')
            if not text:
                continue
            cleaned = self.clean_ocr_text(text)
            if not cleaned:
                continue
            page = neighbor_chunk.get('metadata', {}).get('page')
            prefix = f"(Chunk {neighbor_id}"
            if page is not None:
                prefix += f", Page {page}"
            prefix += ") "
            neighbors.append(prefix + cleaned[:600])
        return neighbors

    def build_raw_context(self, sorted_results, max_length=4000) -> str:
        """Fallback context builder using raw chunk content."""
        context_parts = []
        current_length = 0

        for result in sorted_results:
            content = self.clean_ocr_text(result.get('content', ''))
            source = result.get('metadata', {}).get('source', 'Unknown')
            score = result.get('score', 0.0)

            if not content:
                continue

            sentences = self._split_into_sentences(content)
            if sentences:
                content = ' '.join(sentences[:6])
            content = content[:1200]

            chunk_text = f"[Source: {source}, Relevance: {score:.3f}]\n{content}\n"
            chunk_length = len(chunk_text)

            if current_length + chunk_length > max_length:
                break

            context_parts.append(chunk_text)
            current_length += chunk_length

        return "\n---\n".join(context_parts)

    def format_digest_for_context(self, digest: Dict[str, Any]) -> str:
        """Format a stored document digest into concise context text."""
        lines = []
        title = digest.get('title')
        if title:
            lines.append(f"Title: {title}")

        overview = digest.get('overview')
        if overview:
            lines.append(f"Overview: {overview}")

        key_points = digest.get('key_points') or []
        if key_points:
            lines.append("Key Points:")
            for point in key_points[:5]:
                lines.append(f"- {point}")

        themes = digest.get('themes') or []
        if themes:
            themes_display = ', '.join(theme.title() for theme in themes[:6])
            if themes_display:
                lines.append(f"Themes: {themes_display}")

        return '\n'.join(lines)

    def find_relevant_theme_snippets(self, question: str, used_sources: Optional[Set[str]] = None, limit: int = 3) -> List[str]:
        """Find the most relevant theme capsules for a question."""
        if not question or not self.theme_capsules:
            return []

        question_lower = question.lower()
        question_words = set(re.findall(r'\b\w+\b', question_lower))
        matches: List[Tuple[float, str]] = []

        for theme_key, capsule in self.theme_capsules.items():
            theme_label = capsule.get('theme') or theme_key
            theme_lower = theme_label.lower()

            theme_words = set(theme_lower.split())
            overlap = question_words.intersection(theme_words)
            similarity = SequenceMatcher(None, question_lower, theme_lower).ratio()

            if not overlap and similarity < 0.45:
                continue

            snippet = self.compose_theme_snippet(capsule, used_sources)
            if snippet:
                score = max(similarity, len(overlap) / max(1, len(question_words)))
                matches.append((score, snippet))

        matches.sort(key=lambda item: item[0], reverse=True)
        return [snippet for _, snippet in matches[:limit]]

    def compose_theme_snippet(self, capsule: Dict[str, Any], used_sources: Optional[Set[str]] = None) -> str:
        """Compose a concise snippet from a theme capsule."""
        theme_label = capsule.get('theme') or 'Related Theme'
        snippets = capsule.get('snippets') or []
        key_points = capsule.get('key_points') or []
        sources = capsule.get('sources') or []

        lines = []
        if snippets:
            lines.append(f"{theme_label.title()}: {snippets[0]}")
        elif key_points:
            lines.append(f"{theme_label.title()}: {key_points[0]}")

        if key_points:
            joined_points = '; '.join(key_points[:3])
            lines.append(f"Key Points: {joined_points}")

        if sources:
            filtered_sources = [s for s in sources if not used_sources or s not in used_sources]
            if not filtered_sources:
                filtered_sources = sources[:3]
            lines.append("Sources: " + ', '.join(filtered_sources[:3]))

        return '\n'.join(lines).strip()

    def _format_insight_section(self, insights: List[Dict[str, Any]]) -> str:
        """Format distilled insights for prompt inclusion."""
        if not insights:
            return ""

        lines = []
        for insight in insights[:4]:
            title = insight.get('title') or Path(insight['source']).stem.replace('_', ' ')
            lines.append(f"* {title}")
            overview = insight.get('overview')
            if overview:
                lines.append(f"  - Summary: {overview[:220]}")
            key_points = insight.get('key_points') or []
            for point in self._dedupe_preserve(key_points[:2]):
                lines.append(f"  - Key Point: {point}")
            themes = insight.get('themes') or []
            if themes:
                lines.append(f"  - Themes: {', '.join(themes[:4])}")
        return '\n'.join(lines)

    def _build_reflective_prompt(
        self,
        question: str,
        analysis_notes: str,
        insights: List[Dict[str, Any]],
        key_info: str,
        original_context: str,
        question_type: str,
        entity_hints: str
    ) -> str:
        """Assemble the final reflective prompt for the model."""
        insight_brief = self._format_insight_section(insights)
        memory_section = '\n'.join(f"- {note}" for note in self.short_term_memory[-self.memory_limit:]) or "None."
        guidelines = {
            "factual": "Provide a concise, evidence-backed answer referencing specific peoples or cultures. Mention uncertainty if evidence is limited.",
            "explanatory": "Explain the historical narrative step-by-step, citing specific cultures and time periods.",
            "comparative": "Compare the groups or ideas clearly, noting similarities and differences.",
            "general": "Provide a succinct overview highlighting the most relevant information."
        }.get(question_type, "Answer clearly and cite the evidence from the notes.")

        context_excerpt = self.clean_ocr_text(original_context)[:1200]

        prompt = f"""You already drafted internal notes about the user's question. Use them to craft the final answer.

QUESTION: {question}

ANALYSIS NOTES:
{analysis_notes if analysis_notes else 'No notes available.'}

SHORT-TERM MEMORY:
{memory_section}

DISTILLED INSIGHTS:
{insight_brief if insight_brief else 'None provided.'}

KEY INFO:
{key_info}

EVIDENCE CONTEXT:
{context_excerpt}

ENTITY GRAPH HINTS:
{entity_hints if entity_hints else 'No graph-based relationships identified for this query.'}

INSTRUCTIONS:
- {guidelines}
- Follow the ReAct pattern with at most two iterations to reason: alternate between Thought, Action, and Observation.
- Allowed actions: review_notes (inspect analysis notes and distilled insights), consult_entity_graph (use the hints above), synthesize_answer (shape the final response).
- Keep each Thought under two sentences and ground it in evidence.
- Final Answer should include 1 concise paragraph and, when helpful, a short bullet list citing document titles.
- If information is missing, acknowledge it briefly.

OUTPUT FORMAT:
Thought: <internal reasoning grounded in evidence>
Action: <review_notes|consult_entity_graph|synthesize_answer>
Observation: <result of the action>
[Repeat Thought/Action/Observation up to two times if needed]
Final Answer: <concise user-facing response referencing sources>

FINAL ANSWER:"""
        return prompt

    def _extract_final_answer_from_react(self, react_output: str) -> Tuple[str, str]:
        """Split the ReAct transcript into reasoning trace and final answer."""
        if not react_output:
            return "", ""

        match = re.search(r'Final Answer\s*:\s*(.*)', react_output, flags=re.IGNORECASE | re.DOTALL)
        if not match:
            cleaned = react_output.strip()
            return cleaned, ""

        reasoning_trace = react_output[:match.start()].strip()
        final_answer = match.group(1).strip()

        # Drop any trailing Thought/Action sections that may follow the final answer
        trailing_split = re.split(r'\n(?=Thought\s*:)', final_answer, maxsplit=1, flags=re.IGNORECASE)
        if trailing_split:
            final_answer = trailing_split[0].strip()

        return final_answer, reasoning_trace

    def _extract_relevant_sentences(
        self,
        question_words: Set[str],
        insight: Dict[str, Any],
        limit: int = 3
    ) -> List[str]:
        """Pick sentences from the insight that align with the question."""
        pool: List[Tuple[float, str]] = []
        texts = [insight.get('overview', '')] + insight.get('key_points', [])

        theme_words = set()
        for theme in insight.get('themes', []):
            theme_tokens = re.findall(r'\b\w+\b', theme.lower())
            theme_words.update(self._filter_stop_words(theme_tokens))

        for text in texts:
            for sentence in re.split(r'(?<=[.!?])\s+', text):
                sentence = sentence.strip()
                if len(sentence) < 40:
                    continue
                sentence_tokens = re.findall(r'\b\w+\b', sentence.lower())
                sentence_words = self._filter_stop_words(sentence_tokens)
                overlap = question_words.intersection(sentence_words | theme_words)
                if not overlap:
                    continue
                score = len(overlap) / max(1, len(question_words))
                pool.append((score, sentence))

        pool.sort(key=lambda x: x[0], reverse=True)
        sentences = [sent for _, sent in pool[:limit]]
        return self._dedupe_preserve(sentences)

    def _sentences_similar(self, sent1: str, sent2: str, threshold: float = 0.85) -> bool:
        """Check if two sentences are very similar (for deduplication)."""
        if not sent1 or not sent2:
            return False
        
        # Exact match
        if sent1 == sent2:
            return True
        
        # Check if one is a substring of the other (with high overlap)
        shorter = sent1 if len(sent1) < len(sent2) else sent2
        longer = sent2 if len(sent1) < len(sent2) else sent1
        
        if len(shorter) < 20:
            return False
        
        # Calculate word overlap
        words1 = set(re.findall(r'\b\w+\b', sent1.lower()))
        words2 = set(re.findall(r'\b\w+\b', sent2.lower()))
        
        if not words1 or not words2:
            return False
        
        intersection = words1.intersection(words2)
        union = words1.union(words2)
        
        if not union:
            return False
        
        similarity = len(intersection) / len(union)
        return similarity >= threshold

    def _clean_intro(self, intro: str) -> str:
        """Clean up the introduction text, removing redundant patterns."""
        if not intro:
            return ""
        
        intro_clean = intro
        
        # Remove duplicate theme lists (e.g., "Themes: X Themes: X")
        theme_pattern = r'Themes:\s*([^.]*?)(?:Themes:\s*\1)+'
        intro_clean = re.sub(theme_pattern, r'Themes: \1', intro_clean, flags=re.IGNORECASE)
        
        # Remove duplicate title patterns
        title_pattern = r'Title:\s*([^.]*?)(?:Title:\s*\1)+'
        intro_clean = re.sub(title_pattern, r'Title: \1', intro_clean, flags=re.IGNORECASE)
        
        # Remove duplicate key points markers
        intro_clean = re.sub(r'(Key Points:\s*){2,}', 'Key Points: ', intro_clean, flags=re.IGNORECASE)
        
        # Remove duplicate sentences/phrases in intro
        sentences = re.split(r'[.!?]+\s+', intro_clean)
        seen = set()
        unique_sentences = []
        
        for sentence in sentences:
            sentence = sentence.strip()
            if not sentence:
                continue
            
            # Normalize for comparison
            normalized = re.sub(r'[^\w\s]', '', sentence.lower().strip())
            normalized = re.sub(r'\s+', ' ', normalized)
            
            # Extract key words
            words = [w for w in normalized.split() if len(w) > 3]
            word_key = ' '.join(sorted(set(words)))
            
            if word_key and word_key not in seen and len(words) >= 3:
                unique_sentences.append(sentence)
                seen.add(word_key)
        
        if unique_sentences:
            intro_clean = '. '.join(unique_sentences)
            if not intro_clean.endswith(('.', '!', '?')):
                intro_clean += '.'
        else:
            # Fallback: use original but cleaned
            intro_clean = re.sub(r'\s+', ' ', intro_clean).strip()
        
        # Clean up extra spaces
        intro_clean = re.sub(r'\s+', ' ', intro_clean).strip()
        
        # Ensure it ends with proper punctuation
        if intro_clean and not intro_clean.endswith(('.', '!', '?')):
            intro_clean = intro_clean.rstrip('.') + '.'
        
        return intro_clean

    def _clean_sentence(self, sentence: str) -> str:
        """Clean up a sentence, removing metadata markers and fixing formatting."""
        if not sentence:
            return ""
        
        cleaned = sentence.strip()
        
        # Remove common metadata markers that might appear inline
        cleaned = re.sub(r'^(Title|Overview|Key Points|Themes):\s*', '', cleaned, flags=re.IGNORECASE)
        cleaned = re.sub(r'\s*(Title|Overview|Key Points|Themes):\s*', ' ', cleaned, flags=re.IGNORECASE)
        
        # Remove bullet markers if they appear
        cleaned = re.sub(r'^[-•*]\s*', '', cleaned)
        
        # Fix broken words (common OCR issues)
        cleaned = re.sub(r'\s+', ' ', cleaned)  # Multiple spaces to single
        cleaned = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned)  # Fix missing spaces
        
        # Ensure proper sentence ending
        if cleaned and not cleaned.endswith(('.', '!', '?', ':', ';')):
            # Don't add period if it's clearly incomplete or a fragment
            if len(cleaned.split()) > 5:  # Only for substantial sentences
                cleaned = cleaned.rstrip('.') + '.'
        
        # Remove very short fragments
        if len(cleaned.strip()) < 20:
            return ""
        
        return cleaned.strip()

    def _format_final_answer(self, answer: str) -> str:
        """Final formatting pass to clean up any remaining issues."""
        if not answer:
            return ""
        
        # First, remove metadata markers and clean up
        answer = re.sub(r'\b(Key Points|Title|Overview|Themes):\s*', '', answer, flags=re.IGNORECASE)
        answer = re.sub(r'^[-•*]\s*', '', answer, flags=re.MULTILINE)
        
        # Split into sentences more carefully
        # Split on sentence endings, but preserve the endings
        sentences = []
        current = ""
        for char in answer:
            current += char
            if char in '.!?':
                # Check if it's really an end of sentence (not abbreviation)
                if len(current) > 10:  # Minimum sentence length
                    sentences.append(current.strip())
                    current = ""
        
        # Add any remaining text
        if current.strip():
            sentences.append(current.strip())
        
        # If splitting failed, try simpler approach
        if len(sentences) < 2:
            sentences = [s.strip() for s in re.split(r'[.!?]+\s+', answer) if s.strip()]
        
        # Deduplicate sentences
        seen = set()
        unique_sentences = []
        
        for sentence in sentences:
            if not sentence or len(sentence.strip()) < 20:
                continue
            
            # Clean the sentence first
            cleaned = self._clean_sentence(sentence)
            if not cleaned:
                continue
            
            # Normalize for comparison (more aggressive)
            normalized = re.sub(r'[^\w\s]', '', cleaned.lower().strip())
            normalized = re.sub(r'\s+', ' ', normalized)
            
            # Extract key words (remove common words)
            words = [w for w in normalized.split() if len(w) > 3]
            if len(words) < 5:  # Skip very short sentences
                continue
            
            # Check for duplicates using word overlap
            is_duplicate = False
            for seen_norm in seen:
                seen_words = set([w for w in seen_norm.split() if len(w) > 3])
                current_words = set(words)
                
                if len(current_words) == 0 or len(seen_words) == 0:
                    continue
                
                overlap = len(current_words.intersection(seen_words))
                total_unique = len(current_words.union(seen_words))
                
                if total_unique > 0 and overlap / total_unique > 0.7:  # 70% word overlap
                    is_duplicate = True
                    break
            
            if not is_duplicate:
                unique_sentences.append(cleaned)
                seen.add(' '.join(words))
        
        if not unique_sentences:
            return answer  # Fallback to original
        
        # Rebuild answer with proper formatting
        formatted = []
        current_para = []
        
        for i, sentence in enumerate(unique_sentences[:10]):  # Limit to top 10 sentences
            current_para.append(sentence)
            
            # Start new paragraph every 2 sentences
            if len(current_para) >= 2:
                para_text = " ".join(current_para)
                formatted.append(para_text)
                formatted.append("")  # Blank line
                current_para = []
        
        # Add remaining sentences
        if current_para:
            formatted.append(" ".join(current_para))
        
        result = "\n".join(line for line in formatted if line.strip()).strip()
        
        # Ensure we didn't remove too much
        if len(result) < 40:
            return answer
        
        return result if result else answer

    def is_generic_output(self, text: Optional[str]) -> bool:
        """Detect generic or non-informative answers."""
        if not text:
            return True
        lowered = text.strip().lower()
        if not lowered:
            return True
        generic_signals = [
            "based on the available documents",
            "according to the distilled documents",
            "i analyzed the available documents",
            "no relevant documents found",
            "no documents available",
            "while i couldn't extract a direct answer"
        ]
        return any(signal in lowered for signal in generic_signals)

    def _filter_stop_words(self, tokens: Iterable[str]) -> Set[str]:
        """Remove trivial stop words from a collection of tokens."""
        filtered: Set[str] = set()
        for token in tokens:
            token_lower = token.lower()
            if len(token_lower) <= 2 or token_lower in self.COMMON_STOP_WORDS:
                continue
            filtered.add(token_lower)
            if token_lower.endswith('s') and len(token_lower) > 3:
                filtered.add(token_lower[:-1])
            elif token_lower.endswith('es') and len(token_lower) > 4:
                filtered.add(token_lower[:-2])
        return filtered

    def _humanize_title(self, source: str, candidate: Optional[str]) -> str:
        """Produce a readable title for a document."""
        if candidate:
            cleaned = candidate.strip()
            lower = cleaned.lower()
            if len(cleaned.split()) >= 2 and not any(token in lower for token in ['library', 'copyright', 'permission', 'chapter']):
                return cleaned
        return Path(source).stem.replace('_', ' ').strip()