#!/usr/bin/env python3
"""
Advanced Sports Betting RAG Service
Incorporates sophisticated PDF processing, FAISS vector search, BM25 hybrid scoring,
and DeepSeek/Tesseract OCR fallback.

Port: 5001 (configurable via RAG_PORT)
"""

import os
import sys
import json
import time
import hashlib
import uuid
import re
import math
import threading
import queue
from datetime import datetime
from pathlib import Path
from functools import wraps
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple

from flask import Flask, request, jsonify, send_from_directory
from werkzeug.utils import secure_filename

# ============================================================================
# Configuration
# ============================================================================
@dataclass
class RAGConfig:
    """RAG Service Configuration"""
    # Paths
    base_dir: Path = Path(__file__).parent.parent
    data_dir: Path = None
    cache_dir: Path = None
    pdf_dir: Path = None
    
    # Chunking parameters
    chunk_size: int = 1000
    chunk_min_size: int = 400
    chunk_max_size: int = 1600
    sentence_overlap: int = 2
    min_sentences: int = 3
    
    # Search parameters
    hybrid_vector_weight: float = 0.50
    hybrid_keyword_weight: float = 0.50
    hybrid_bm25_weight: float = 0.25
    max_chunks_per_source: int = 2
    default_k: int = 10
    
    # OCR fallback URLs
    deepseek_ocr_url: str = "http://127.0.0.1:5003/ocr"
    tesseract_ocr_url: str = "http://127.0.0.1:5002/ocr"
    
    def __post_init__(self):
        if self.data_dir is None:
            self.data_dir = Path(os.environ.get('DATA_DIR', self.base_dir / 'data'))
        if self.cache_dir is None:
            self.cache_dir = self.data_dir / 'rag_cache'
        if self.pdf_dir is None:
            self.pdf_dir = self.data_dir / 'csv'  # Default upload dir
        
        # Ensure directories exist
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self.pdf_dir.mkdir(parents=True, exist_ok=True)

# Document type configurations
DOCUMENT_TYPE_CONFIGS = {
    'technical': {'chunk_size': 800, 'chunk_min_size': 400, 'chunk_max_size': 1200, 'sentence_overlap': 1, 'min_sentences': 2},
    'research': {'chunk_size': 1000, 'chunk_min_size': 500, 'chunk_max_size': 1500, 'sentence_overlap': 2, 'min_sentences': 3},
    'legal': {'chunk_size': 900, 'chunk_min_size': 500, 'chunk_max_size': 1400, 'sentence_overlap': 2, 'min_sentences': 3},
    'sports': {'chunk_size': 1000, 'chunk_min_size': 400, 'chunk_max_size': 1500, 'sentence_overlap': 2, 'min_sentences': 3},
    'default': {'chunk_size': 1000, 'chunk_min_size': 400, 'chunk_max_size': 1600, 'sentence_overlap': 2, 'min_sentences': 3}
}

# Sports betting terms for enhanced search
SPORTS_TERMS = [
    'moneyline', 'spread', 'over', 'under', 'parlay', 'teaser', 'prop', 'futures',
    'odds', 'juice', 'vig', 'vigorish', 'line', 'point spread', 'total', 'handicap',
    'favorite', 'underdog', 'push', 'cover', 'ats', 'against the spread',
    'bankroll', 'unit', 'roi', 'return on investment', 'edge', 'ev', 'expected value',
    'sharp', 'square', 'public', 'steam', 'reverse line movement', 'rlm',
    'nba', 'nfl', 'mlb', 'nhl', 'ncaa', 'college', 'basketball', 'football', 'baseball', 'hockey',
    'bet', 'wager', 'stake', 'risk', 'payout', 'win', 'loss', 'profit',
    'sportsbook', 'bookmaker', 'bookie', 'betting', 'gambling', 'handicapping',
    'model', 'system', 'strategy', 'analytics', 'statistics', 'data', 'trends',
    'injury', 'lineup', 'rotation', 'rest', 'back to back', 'travel', 'schedule',
    'home', 'away', 'road', 'neutral', 'venue', 'court', 'field',
    'quarter', 'half', 'period', 'inning', 'overtime', 'live', 'in-play',
    'closing line', 'opening line', 'line movement', 'market', 'price',
]

# BM25 parameters
BM25_K1 = 1.5
BM25_B = 0.75

# ============================================================================
# PDF Processing
# ============================================================================
# Try to import PDF libraries
try:
    import fitz  # PyMuPDF
    HAS_PYMUPDF = True
except ImportError:
    HAS_PYMUPDF = False

try:
    from pdfminer.high_level import extract_text as pdfminer_extract
    HAS_PDFMINER = True
except ImportError:
    HAS_PDFMINER = False

try:
    from PyPDF2 import PdfReader
    HAS_PYPDF2 = True
except ImportError:
    HAS_PYPDF2 = False

# Try numpy for embeddings
try:
    import numpy as np
    HAS_NUMPY = True
except ImportError:
    HAS_NUMPY = False

# Try sentence-transformers for embeddings
try:
    from sentence_transformers import SentenceTransformer
    HAS_SENTENCE_TRANSFORMERS = True
except ImportError:
    HAS_SENTENCE_TRANSFORMERS = False

# Try FAISS for vector search
try:
    import faiss
    HAS_FAISS = True
except ImportError:
    HAS_FAISS = False

# Try requests for OCR fallback
try:
    import requests
    HAS_REQUESTS = True
except ImportError:
    HAS_REQUESTS = False


# ============================================================================
# LLM Integration for Enrichment
# ============================================================================
class LLMService:
    """LLM service using Grok Fast Reasoning (Enforced)"""
    
    def __init__(self):
        # Enforce Grok usage
        self.grok_api_key = (os.environ.get('GROK_API_KEY') or os.environ.get('XAI_API_KEY', '')).strip().strip('"').strip("'")
        self.grok_url = "https://api.x.ai/v1/chat/completions"
        self.grok_model = "grok-4-fast-reasoning"
        self.deepseek_available = False # Disabled per user request
        
        if not self.grok_api_key:
            print("[RAG] WARNING: GROK_API_KEY not set. LLM features will fail.")
            
    def generate(self, prompt: str, max_tokens: int = 800, temperature: float = 0.5) -> Optional[str]:
        """Generate text using Grok API"""
        return self._call_grok(prompt, max_tokens, temperature)
    
    def _call_grok(self, prompt: str, max_tokens: int, temperature: float) -> Optional[str]:
        """Call Grok API"""
        if not HAS_REQUESTS or not self.grok_api_key:
            print("[RAG] Grok API not configured (no API key)")
            return None
        
        try:
            print(f"[RAG] Calling Grok ({self.grok_model})...")
            response = requests.post(
                self.grok_url,
                headers={
                    'Authorization': f'Bearer {self.grok_api_key}',
                    'Content-Type': 'application/json'
                },
                json={
                    'model': self.grok_model,
                    'messages': [{'role': 'user', 'content': prompt}],
                    'max_tokens': max_tokens,
                    'temperature': temperature,
                    'stream': False
                },
                timeout=45
            )
            
            if response.status_code == 200:
                data = response.json()
                if 'choices' in data and len(data['choices']) > 0:
                    return data['choices'][0]['message']['content']
            else:
                print(f"[RAG] Grok API error: {response.status_code} - {response.text[:500] if response.text else '(empty)'}")
        except Exception as e:
            print(f"[RAG] Grok API exception: {e}")
        
        return None


class PDFProcessor:
    """Advanced PDF processor with structure-aware chunking"""
    
    def __init__(self, config: RAGConfig):
        self.config = config
        self.boilerplate_patterns = [
            r'copyright\s+©?\s*\d{4}',
            r'all rights reserved',
            r'page\s+\d+\s+of\s+\d+',
            r'^\s*$',
            r'^\d+$',
            r'[a-f0-9]{32,}',
            r'^[\s\W]*$',
        ]
    
    def extract_text(self, file_path: Path) -> str:
        """Extract text using best available method"""
        if file_path.suffix.lower() == '.pdf':
            # Try PyMuPDF first (best quality)
            if HAS_PYMUPDF:
                try:
                    return self._extract_with_pymupdf(file_path)
                except Exception as e:
                    print(f"PyMuPDF failed: {e}")
            
            # Try pdfminer
            if HAS_PDFMINER:
                try:
                    return pdfminer_extract(str(file_path))
                except Exception as e:
                    print(f"pdfminer failed: {e}")
            
            # Try PyPDF2
            if HAS_PYPDF2:
                try:
                    reader = PdfReader(file_path)
                    return '\n'.join(page.extract_text() or '' for page in reader.pages)
                except Exception as e:
                    print(f"PyPDF2 failed: {e}")
            
            return ""
        
        elif file_path.suffix.lower() == '.txt':
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    return f.read()
            except UnicodeDecodeError:
                with open(file_path, 'r', encoding='latin-1') as f:
                    return f.read()
        
        return ""
    
    def _extract_with_pymupdf(self, file_path: Path) -> str:
        """High-fidelity extraction using PyMuPDF"""
        doc = fitz.open(file_path)
        pages = []
        try:
            for page in doc:
                text = page.get_text("text", flags=fitz.TEXT_DEHYPHENATE | fitz.TEXT_PRESERVE_WHITESPACE)
                pages.append(text)
        finally:
            doc.close()
        return "\n".join(pages)
    
    def detect_document_type(self, text: str, filename: str) -> str:
        """Detect document type for optimal chunking"""
        text_lower = text.lower()
        filename_lower = filename.lower()
        
        # Sports betting indicators
        sports_indicators = ['betting', 'odds', 'spread', 'moneyline', 'parlay', 'sportsbook', 'handicapping']
        if sum(1 for ind in sports_indicators if ind in text_lower) >= 2:
            return 'sports'
        
        # Technical indicators
        tech_indicators = ['specification', 'technical', 'api', 'protocol', 'algorithm']
        if sum(1 for ind in tech_indicators if ind in text_lower) >= 2:
            return 'technical'
        
        # Research indicators
        research_indicators = ['abstract', 'methodology', 'results', 'conclusion', 'references']
        if sum(1 for ind in research_indicators if ind in text_lower) >= 2:
            return 'research'
        
        return 'default'
    
    def split_into_sentences(self, text: str) -> List[str]:
        """Split text into sentences"""
        if not text:
            return []
        
        # Sentence splitting regex
        sentence_endings = r'(?<=[.!?])\s+(?=[A-Z])|(?<=[.!?])\s*$|(?<=[.!?"\'])\s+(?=[A-Z])'
        sentences = re.split(sentence_endings, text)
        return [s.strip() for s in sentences if len(s.strip()) > 10]
    
    def detect_headings(self, text: str) -> List[Tuple[str, int, int]]:
        """Detect headings in document"""
        headings = []
        lines = text.split('\n')
        
        for i, line in enumerate(lines):
            line_stripped = line.strip()
            if not line_stripped or len(line_stripped) < 3:
                continue
            
            # ALL CAPS headings
            if line_stripped.isupper() and len(line_stripped) >= 4 and len(line_stripped) < 80:
                headings.append((line_stripped, i, 1))
                continue
            
            # Numbered headings
            numbered_match = re.match(r'^\s*(\d+)(?:\.(\d+))?\s+(.+)$', line_stripped)
            if numbered_match:
                depth = sum(1 for g in numbered_match.groups()[:2] if g)
                heading_text = numbered_match.group(3)
                if len(heading_text) > 3:
                    headings.append((heading_text, i, min(depth, 3)))
        
        return headings
    
    def chunk_text(self, text: str, doc_type: str = 'default') -> List[Dict[str, Any]]:
        """Structure-aware chunking"""
        config = DOCUMENT_TYPE_CONFIGS.get(doc_type, DOCUMENT_TYPE_CONFIGS['default'])
        chunk_size = config['chunk_size']
        chunk_min = config['chunk_min_size']
        chunk_max = config['chunk_max_size']
        overlap = config['sentence_overlap']
        min_sentences = config['min_sentences']
        
        if not text or len(text.strip()) < chunk_min:
            return []
        
        # Try structure-aware chunking first
        headings = self.detect_headings(text)
        
        if headings:
            chunks = self._chunk_by_sections(text, headings, config)
            if chunks:
                return chunks
        
        # Fall back to sentence-based chunking
        return self._sentence_based_chunking(text, config)
    
    def _chunk_by_sections(self, text: str, headings: List[Tuple], config: dict) -> List[Dict]:
        """Chunk text by detected sections"""
        chunks = []
        lines = text.split('\n')
        
        # Process sections between headings
        for i, (heading_text, heading_pos, level) in enumerate(headings):
            end_pos = headings[i + 1][1] if i + 1 < len(headings) else len(lines)
            section_content = '\n'.join(lines[heading_pos + 1:end_pos]).strip()
            
            if section_content and len(section_content) >= config['chunk_min_size']:
                section_chunks = self._sentence_based_chunking(section_content, config)
                for chunk in section_chunks:
                    chunk['section'] = heading_text
                    chunk['section_level'] = level
                chunks.extend(section_chunks)
        
        return chunks
    
    def _sentence_based_chunking(self, text: str, config: dict) -> List[Dict]:
        """Sentence-based chunking with overlap"""
        sentences = self.split_into_sentences(text)
        if len(sentences) < config['min_sentences']:
            return []
        
        chunks = []
        current_chunk = []
        current_length = 0
        
        for sentence in sentences:
            sentence_length = len(sentence) + 1
            
            if current_length + sentence_length > config['chunk_max_size'] and current_chunk:
                chunk_text = ' '.join(current_chunk).strip()
                if len(chunk_text) >= config['chunk_min_size']:
                    chunks.append({
                        'content': chunk_text,
                        'sentence_count': len(current_chunk),
                        'char_count': len(chunk_text)
                    })
                
                # Start new chunk with overlap
                overlap_count = min(config['sentence_overlap'], len(current_chunk))
                current_chunk = current_chunk[-overlap_count:] if overlap_count > 0 else []
                current_length = sum(len(s) + 1 for s in current_chunk)
            
            current_chunk.append(sentence)
            current_length += sentence_length
            
            if current_length >= config['chunk_size'] and len(current_chunk) >= config['min_sentences']:
                chunk_text = ' '.join(current_chunk).strip()
                if len(chunk_text) >= config['chunk_min_size']:
                    chunks.append({
                        'content': chunk_text,
                        'sentence_count': len(current_chunk),
                        'char_count': len(chunk_text)
                    })
                
                overlap_count = min(config['sentence_overlap'], len(current_chunk))
                current_chunk = current_chunk[-overlap_count:] if overlap_count > 0 else []
                current_length = sum(len(s) + 1 for s in current_chunk)
        
        # Final chunk
        if current_chunk:
            chunk_text = ' '.join(current_chunk).strip()
            if len(chunk_text) >= config['chunk_min_size']:
                chunks.append({
                    'content': chunk_text,
                    'sentence_count': len(current_chunk),
                    'char_count': len(chunk_text)
                })
        
        return chunks
    
    def is_valid_chunk(self, chunk: Dict, config: dict) -> bool:
        """Validate chunk quality"""
        content = chunk.get('content', '')
        if len(content) < config.get('chunk_min_size', 400):
            return False
        
        # Check for boilerplate
        content_lower = content.lower()
        for pattern in self.boilerplate_patterns:
            if re.search(pattern, content_lower, re.IGNORECASE):
                return False
        
        # Check for meaningful content
        words = re.findall(r'\b\w+\b', content_lower)
        if len(words) < 5:
            return False
        
        # Check uniqueness ratio
        unique_ratio = len(set(words)) / len(words) if words else 0
        if unique_ratio < 0.3:
            return False
        
        return True
    
    def process_file(self, file_path: Path, source_name: str = None) -> List[Dict]:
        """Process a file and return chunks with metadata"""
        if source_name is None:
            source_name = file_path.name
        
        # Extract text
        text = self.extract_text(file_path)
        if not text or len(text.strip()) < 100:
            return []
        
        # Detect document type
        doc_type = self.detect_document_type(text, source_name)
        config = DOCUMENT_TYPE_CONFIGS.get(doc_type, DOCUMENT_TYPE_CONFIGS['default'])
        
        # Chunk text
        raw_chunks = self.chunk_text(text, doc_type)
        
        # Filter and add metadata
        chunks = []
        for i, chunk in enumerate(raw_chunks):
            if self.is_valid_chunk(chunk, config):
                chunk['source'] = source_name
                chunk['chunk_id'] = i
                chunk['total_chunks'] = len(raw_chunks)
                chunk['document_type'] = doc_type
                chunks.append(chunk)
        
        return chunks


# ============================================================================
# Vector Store with Hybrid Search
# ============================================================================
class VectorStore:
    """FAISS-based vector store with BM25 hybrid search"""
    
    def __init__(self, config: RAGConfig):
        self.config = config
        self.documents = []
        self.embeddings = []
        self.index = None
        self.model = None
        self.searchable_texts = []
        self.doc_lengths = []
        self.avg_doc_length = 0
        
        # File paths
        self.index_file = config.cache_dir / 'faiss_index.index'
        self.metadata_file = config.cache_dir / 'documents_metadata.json'
        self.embeddings_file = config.cache_dir / 'embeddings.npy'
        
        # Initialize embedding model
        self._init_model()
    
    def _init_model(self):
        """Initialize embedding model"""
        if HAS_SENTENCE_TRANSFORMERS:
            try:
                self.model = SentenceTransformer('all-MiniLM-L6-v2')
                self.embedding_dim = 384
                print(f"[RAG] Loaded SentenceTransformer model")
            except Exception as e:
                print(f"[RAG] SentenceTransformer failed: {e}")
                self.model = None
        
        if self.model is None:
            self.embedding_dim = 384
            print("[RAG] Using hash-based embeddings (install sentence-transformers for better results)")
    
    def _get_embedding(self, text: str) -> np.ndarray:
        """Get embedding for text"""
        if self.model:
            return self.model.encode(text, normalize_embeddings=True)
        else:
            # Hash-based fallback
            h = hashlib.sha384(text.encode()).digest()
            arr = np.frombuffer(h, dtype=np.uint8).astype(np.float32)
            return arr / (np.linalg.norm(arr) + 1e-9)
    
    def _get_embeddings_batch(self, texts: List[str]) -> np.ndarray:
        """Get embeddings for multiple texts"""
        if self.model:
            return self.model.encode(texts, normalize_embeddings=True, show_progress_bar=True)
        else:
            return np.array([self._get_embedding(t) for t in texts])
    
    def load(self) -> bool:
        """Load existing index and documents"""
        if not self.metadata_file.exists():
            return False
        
        try:
            with open(self.metadata_file, 'r', encoding='utf-8') as f:
                self.documents = json.load(f)
            
            if HAS_FAISS and self.index_file.exists():
                self.index = faiss.read_index(str(self.index_file))
            elif self.embeddings_file.exists():
                self.embeddings = np.load(str(self.embeddings_file))
            
            self._prepare_searchable_texts()
            print(f"[RAG] Loaded {len(self.documents)} documents")
            return True
        except Exception as e:
            print(f"[RAG] Failed to load index: {e}")
            return False
    
    def save(self):
        """Save index and documents"""
        try:
            with open(self.metadata_file, 'w', encoding='utf-8') as f:
                json.dump(self.documents, f, ensure_ascii=False, indent=2)
            
            if HAS_FAISS and self.index is not None:
                faiss.write_index(self.index, str(self.index_file))
            elif len(self.embeddings) > 0:
                np.save(str(self.embeddings_file), np.array(self.embeddings))
            
            print(f"[RAG] Saved {len(self.documents)} documents")
        except Exception as e:
            print(f"[RAG] Failed to save index: {e}")
    
    def _prepare_searchable_texts(self):
        """Build searchable text cache"""
        self.searchable_texts = []
        self.doc_lengths = []
        
        for doc in self.documents:
            text = self._build_searchable_text(doc)
            self.searchable_texts.append(text.lower())
            self.doc_lengths.append(len(text.split()))
        
        self.avg_doc_length = sum(self.doc_lengths) / len(self.doc_lengths) if self.doc_lengths else 0
    
    def _build_searchable_text(self, doc: Dict) -> str:
        """Build searchable text from document"""
        parts = []
        
        # Add source
        source = doc.get('source', '')
        if source:
            clean_source = re.sub(r'\.(pdf|txt)$', '', source, flags=re.IGNORECASE)
            parts.append(clean_source.replace('_', ' ').replace('-', ' '))
        
        # Add section
        if doc.get('section'):
            parts.append(doc['section'])

        # Enriched metadata (improves keyword/BM25 search without changing embeddings)
        if doc.get('summary'):
            parts.append(str(doc.get('summary')))
        if doc.get('key_points'):
            try:
                parts.append(' '.join([str(p) for p in doc.get('key_points', [])]))
            except Exception:
                pass
        if doc.get('themes'):
            try:
                parts.append(' '.join([str(t) for t in doc.get('themes', [])]))
            except Exception:
                pass
        
        # Add content
        parts.append(doc.get('content', ''))
        
        return ' '.join(parts)

    def save_metadata_only(self):
        """Persist documents metadata safely without rewriting the FAISS index/embeddings."""
        try:
            tmp_path = self.metadata_file.with_suffix(self.metadata_file.suffix + '.tmp')
            with open(tmp_path, 'w', encoding='utf-8') as f:
                json.dump(self.documents, f, ensure_ascii=False, indent=2)
            os.replace(tmp_path, self.metadata_file)  # atomic on POSIX
        except Exception as e:
            print(f"[RAG] Failed to save metadata (checkpoint): {e}")
    
    def add_documents(self, chunks: List[Dict]):
        """Add documents to the store"""
        if not chunks:
            return
        
        contents = [c['content'] for c in chunks]
        new_embeddings = self._get_embeddings_batch(contents)
        
        if HAS_FAISS:
            if self.index is None:
                self.index = faiss.IndexFlatIP(self.embedding_dim)
            
            embeddings_np = np.array(new_embeddings).astype('float32')
            faiss.normalize_L2(embeddings_np)
            self.index.add(embeddings_np)
        else:
            if isinstance(self.embeddings, np.ndarray) and len(self.embeddings) > 0:
                self.embeddings = np.vstack([self.embeddings, new_embeddings])
            else:
                self.embeddings = new_embeddings
        
        self.documents.extend(chunks)
        
        # Update searchable texts
        for chunk in chunks:
            text = self._build_searchable_text(chunk)
            self.searchable_texts.append(text.lower())
            self.doc_lengths.append(len(text.split()))
        
        if self.doc_lengths:
            self.avg_doc_length = sum(self.doc_lengths) / len(self.doc_lengths)
        
        print(f"[RAG] Added {len(chunks)} documents. Total: {len(self.documents)}")
    
    def replace_source(self, source_name: str, new_chunks: List[Dict]):
        """Replace all documents for a source"""
        # Remove old documents
        keep_docs = []
        keep_embeddings = []
        
        for i, doc in enumerate(self.documents):
            if doc.get('source') != source_name:
                keep_docs.append(doc)
                if not HAS_FAISS and len(self.embeddings) > i:
                    keep_embeddings.append(self.embeddings[i])
        
        self.documents = keep_docs
        
        # Rebuild index
        if HAS_FAISS:
            self.index = faiss.IndexFlatIP(self.embedding_dim)
            if keep_docs:
                contents = [d['content'] for d in keep_docs]
                embeddings = self._get_embeddings_batch(contents).astype('float32')
                faiss.normalize_L2(embeddings)
                self.index.add(embeddings)
        else:
            self.embeddings = np.array(keep_embeddings) if keep_embeddings else np.array([])
        
        self._prepare_searchable_texts()
        
        # Add new documents
        self.add_documents(new_chunks)
    
    def search(self, query: str, k: int = 10) -> List[Dict]:
        """Hybrid vector + keyword search"""
        if not self.documents:
            return []
        
        query_lower = query.lower()
        query_words = re.findall(r'\b\w+\b', query_lower)
        
        # Build keyword patterns
        keyword_patterns = []
        for word in query_words:
            if len(word) > 2:
                keyword_patterns.append({
                    'term': word,
                    'pattern': re.compile(r'\b' + re.escape(word) + r'\b', re.IGNORECASE),
                    'is_sports': word in SPORTS_TERMS
                })
        
        # Add sports terms from query
        for term in SPORTS_TERMS:
            if term in query_lower and term not in [p['term'] for p in keyword_patterns]:
                keyword_patterns.append({
                    'term': term,
                    'pattern': re.compile(r'\b' + re.escape(term) + r'\b', re.IGNORECASE),
                    'is_sports': True
                })
        
        # Compute IDF
        idf_table = self._compute_idf(keyword_patterns)
        
        # Vector search
        query_embedding = self._get_embedding(query)
        
        if HAS_FAISS and self.index is not None:
            query_embedding = np.array([query_embedding]).astype('float32')
            faiss.normalize_L2(query_embedding)
            oversample = min(k * 4, len(self.documents))
            scores, indices = self.index.search(query_embedding, oversample)
            vector_results = list(zip(scores[0], indices[0]))
        else:
            # Cosine similarity fallback
            if isinstance(self.embeddings, np.ndarray) and len(self.embeddings) > 0:
                similarities = np.dot(self.embeddings, query_embedding)
                oversample = min(k * 4, len(self.documents))
                top_indices = np.argsort(similarities)[-oversample:][::-1]
                vector_results = [(similarities[i], i) for i in top_indices]
            else:
                vector_results = []
        
        # Score candidates
        candidates = []
        for vector_score, idx in vector_results:
            if idx >= len(self.documents):
                continue
            
            doc = self.documents[idx]
            searchable_text = self.searchable_texts[idx] if idx < len(self.searchable_texts) else ''
            
            # Keyword score
            keyword_score = 0.0
            matched_terms = []
            for p in keyword_patterns:
                if p['pattern'].search(searchable_text):
                    matched_terms.append(p['term'])
                    keyword_score += 0.5 if p['is_sports'] else 0.3
            
            keyword_score = min(1.0, keyword_score)
            
            # BM25 score
            bm25_score = self._compute_bm25(idx, keyword_patterns, idf_table)
            
            # Combined score
            combined = (
                self.config.hybrid_vector_weight * float(vector_score) +
                self.config.hybrid_keyword_weight * keyword_score +
                self.config.hybrid_bm25_weight * bm25_score
            )
            
            candidates.append({
                'content': doc['content'],
                'metadata': {k: v for k, v in doc.items() if k != 'content'},
                'score': combined,
                'vector_score': float(vector_score),
                'keyword_score': keyword_score,
                'bm25_score': bm25_score,
                'matched_terms': matched_terms
            })
        
        # Sort and limit by source
        candidates.sort(key=lambda x: x['score'], reverse=True)
        
        results = []
        source_counts = {}
        for c in candidates:
            source = c['metadata'].get('source', 'unknown')
            if source_counts.get(source, 0) < self.config.max_chunks_per_source:
                results.append(c)
                source_counts[source] = source_counts.get(source, 0) + 1
                if len(results) >= k:
                    break
        
        return results
    
    def _compute_idf(self, patterns: List[Dict]) -> Dict[str, float]:
        """Compute IDF for terms"""
        idf_table = {}
        total_docs = len(self.documents)
        if total_docs == 0:
            return idf_table
        
        for p in patterns:
            term = p['term']
            if term in idf_table:
                continue
            
            df = sum(1 for text in self.searchable_texts if p['pattern'].search(text))
            idf = math.log((total_docs - df + 0.5) / (df + 0.5) + 1.0)
            idf_table[term] = max(idf, 0.0)
        
        return idf_table
    
    def _compute_bm25(self, idx: int, patterns: List[Dict], idf_table: Dict) -> float:
        """Compute BM25 score for a document"""
        if not patterns or idx >= len(self.searchable_texts):
            return 0.0
        
        doc_text = self.searchable_texts[idx]
        doc_len = self.doc_lengths[idx] if idx < len(self.doc_lengths) else len(doc_text.split())
        avg_len = self.avg_doc_length or doc_len or 1
        
        bm25 = 0.0
        for p in patterns:
            tf = len(p['pattern'].findall(doc_text))
            if tf == 0:
                continue
            
            idf = idf_table.get(p['term'], 0.0)
            numerator = tf * (BM25_K1 + 1.0)
            denominator = tf + BM25_K1 * (1.0 - BM25_B + BM25_B * (doc_len / avg_len))
            bm25 += idf * (numerator / denominator)
        
        return bm25


# ============================================================================
# OCR Fallback Service
# ============================================================================
class OCRService:
    """OCR fallback for scanned PDFs using DeepSeek OCR (port 5003) and Tesseract (port 5002)"""
    
    def __init__(self, config: RAGConfig):
        self.config = config
        # DeepSeek OCR on port 5003 (GPU accelerated)
        self.deepseek_ocr_url = os.environ.get('DEEPSEEK_OCR_URL', 'http://127.0.0.1:5003/ocr')
        # Tesseract OCR on port 5002 (CPU fallback)
        self.tesseract_ocr_url = os.environ.get('TESSERACT_OCR_URL', 'http://127.0.0.1:5002/ocr')
    
    def extract_text(self, file_path: Path) -> Optional[str]:
        """Try OCR extraction with DeepSeek (primary) and Tesseract (fallback)"""
        if not HAS_REQUESTS:
            return None
        
        # Try DeepSeek OCR first (GPU accelerated, faster)
        text = self._try_ocr(self.deepseek_ocr_url, file_path, "DeepSeek")
        if text:
            return text
        
        # Fall back to Tesseract (CPU)
        text = self._try_ocr(self.tesseract_ocr_url, file_path, "Tesseract")
        return text
    
    def _try_ocr(self, url: str, file_path: Path, service_name: str) -> Optional[str]:
        """Try OCR service"""
        try:
            print(f"[RAG] Trying {service_name} OCR at {url}...")
            with open(file_path, 'rb') as f:
                files = {'file': (file_path.name, f, 'application/pdf')}
                response = requests.post(url, files=files, timeout=600)
            
            if response.status_code == 200:
                data = response.json()
                if data.get('success') and data.get('text'):
                    print(f"[RAG] {service_name} OCR extracted {len(data['text'])} chars")
                    return data['text']
                elif data.get('text'):  # Some OCR services return text without success flag
                    print(f"[RAG] {service_name} OCR extracted {len(data['text'])} chars")
                    return data['text']
        except Exception as e:
            print(f"[RAG] {service_name} OCR failed: {e}")
        
        return None


# ============================================================================
# Enrichment Service
# ============================================================================
class EnrichmentService:
    """Service for LLM-based chunk enrichment"""
    
    def __init__(self, config: RAGConfig):
        self.config = config
        self.llm = LLMService()
        self.enrichment_budget = int(os.environ.get('ENRICHMENT_BUDGET', 999999))
        self.enrichment_used = 0
    
    def enrich_chunk(self, chunk: Dict) -> Dict:
        """Enrich a single chunk with LLM-generated metadata"""
        content = chunk.get('content', '')
        if not content or len(content) < 50:
            return chunk
        
        # Generate basic card first (no LLM)
        card = self._generate_basic_card(content, chunk)
        
        # Try LLM enhancement if within budget
        if self.enrichment_used < self.enrichment_budget:
            enhanced = self._enhance_with_llm(content, card)
            if enhanced:
                card.update(enhanced)
                self.enrichment_used += 1
        
        # Update chunk with enrichment
        enriched_chunk = chunk.copy()
        enriched_chunk['summary'] = card.get('summary', '')
        enriched_chunk['key_points'] = card.get('key_points', [])
        enriched_chunk['themes'] = card.get('themes', [])
        enriched_chunk['enriched'] = True

        # IMPORTANT:
        # Do NOT overwrite `content` during enrichment.
        # The vector embeddings/FAISS index are built from `content`; changing it would require
        # recomputing embeddings and rebuilding the index (expensive + fragile).
        # Instead, the VectorStore builds searchable text using summary/key_points/themes.
        return enriched_chunk
    
    def _generate_basic_card(self, content: str, chunk: Dict) -> Dict:
        """Generate basic knowledge card without LLM"""
        # Split into sentences
        sentences = re.split(r'(?<=[.!?])\s+', content)
        sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
        
        # Basic summary (first 2 sentences)
        summary = ' '.join(sentences[:2])[:400] if sentences else content[:400]
        
        # Key points (first 4 sentences)
        key_points = [s[:220] for s in sentences[:4]]
        
        # Extract themes via frequency analysis
        themes = self._extract_themes(content)
        
        return {
            'summary': summary,
            'key_points': key_points,
            'themes': themes,
            'source': chunk.get('source', 'unknown'),
            'chunk_id': chunk.get('chunk_id', 0)
        }
    
    def _extract_themes(self, text: str, max_themes: int = 5) -> List[str]:
        """Extract themes using frequency analysis"""
        words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
        if not words:
            return []
        
        # Stop words to exclude
        stop_words = {
            'that', 'this', 'with', 'have', 'from', 'they', 'were', 'their',
            'there', 'which', 'about', 'into', 'through', 'also', 'very',
            'than', 'them', 'some', 'only', 'even', 'most', 'more', 'like',
            'been', 'being', 'would', 'could', 'should', 'will', 'just',
            'because', 'when', 'where', 'what', 'your', 'other', 'each'
        }
        
        # Count word frequency
        word_counts = {}
        for word in words:
            if word not in stop_words and len(word) >= 4:
                word_counts[word] = word_counts.get(word, 0) + 1
        
        # Get top themes
        sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
        themes = [word for word, count in sorted_words[:max_themes] if count >= 2]
        
        # Add sports-specific themes
        for term in SPORTS_TERMS:
            if term in text.lower() and term not in themes:
                themes.append(term)
                if len(themes) >= max_themes:
                    break
        
        return themes[:max_themes]
    
    def _enhance_with_llm(self, content: str, base_card: Dict) -> Optional[Dict]:
        """Enhance card with LLM"""
        prompt = f"""You are refining a knowledge card for a sports betting document. Extract the most important information.

EXCERPT:
{content[:800]}

CURRENT SUMMARY:
{base_card['summary']}

Provide an improved knowledge card in this exact format:
Summary: <1-2 sentences capturing the main point about sports betting>
Key Points:
- <concise bullet 1>
- <concise bullet 2>
- <concise bullet 3>
Themes: theme1, theme2, theme3

Begin the card now:"""
        
        try:
            response = self.llm.generate(prompt, max_tokens=200, temperature=0.3)
            if response:
                return self._parse_llm_response(response)
        except Exception as e:
            print(f"[RAG] LLM enhancement error: {e}")
        
        return None
    
    def _parse_llm_response(self, text: str) -> Optional[Dict]:
        """Parse LLM output into structured card"""
        if not text:
            return None
        
        result = {}
        
        # Extract summary
        summary_match = re.search(r'Summary:\s*(.+?)(?=Key Points:|Themes:|$)', text, re.DOTALL)
        if summary_match:
            result['summary'] = summary_match.group(1).strip()[:400]
        
        # Extract key points
        key_points = []
        kp_match = re.search(r'Key Points:\s*(.*?)(?=Themes:|$)', text, re.DOTALL)
        if kp_match:
            lines = kp_match.group(1).strip().split('\n')
            for line in lines:
                line = line.strip()
                if line.startswith('-'):
                    line = line[1:].strip()
                if line and len(line) > 5:
                    key_points.append(line[:220])
        if key_points:
            result['key_points'] = key_points[:5]
        
        # Extract themes
        themes_match = re.search(r'Themes:\s*(.+)', text)
        if themes_match:
            themes = [t.strip() for t in themes_match.group(1).split(',')]
            result['themes'] = [t for t in themes if t][:5]
        
        return result if result else None
    
    def _compose_enriched_content(self, original: str, card: Dict) -> str:
        """Compose enriched content for storage"""
        lines = [
            f"[Source: {card.get('source', 'Unknown')} | Chunk {card.get('chunk_id', '?')}]",
            f"Summary: {card.get('summary', '')}"
        ]
        
        if card.get('key_points'):
            lines.append("Key Points:")
            for point in card['key_points']:
                lines.append(f"- {point}")
        
        if card.get('themes'):
            lines.append(f"Themes: {', '.join(card['themes'])}")
        
        lines.append(f"\nOriginal Content:\n{original}")
        
        return '\n'.join(lines)
    
    def enrich_all_chunks(self, chunks: List[Dict], progress_callback=None) -> List[Dict]:
        """Enrich all chunks with progress tracking"""
        enriched = []
        total = len(chunks)
        
        for i, chunk in enumerate(chunks):
            enriched_chunk = self.enrich_chunk(chunk)
            enriched.append(enriched_chunk)
            
            if progress_callback:
                progress_callback(i + 1, total)
            
            if (i + 1) % 10 == 0:
                print(f"[RAG] Enriched {i + 1}/{total} chunks (LLM used: {self.enrichment_used})")
        
        return enriched


# ============================================================================
# Flask Application
# ============================================================================
app = Flask(__name__)

# Global instances
config = RAGConfig()
pdf_processor = PDFProcessor(config)
vector_store = VectorStore(config)
ocr_service = OCRService(config)
enrichment_service = EnrichmentService(config)

# Progress tracking
upload_progress = {}
enrichment_status = {
    'status': 'idle',
    'progress': 0,
    'message': '',
    'llm_used': 0,
    'updated_at': None,
    'started_at': None,
    'source': None,
    'total_target': 0,
    'done_target': 0,
    'checkpoint_every': int(os.environ.get('ENRICHMENT_CHECKPOINT_EVERY', 10)),
}
progress_lock = threading.Lock()
enrichment_lock = threading.Lock()
processing_queue = queue.Queue()

# Enrichment runtime + persistence
enrichment_thread = None
ENRICHMENT_STATUS_FILE = config.cache_dir / 'enrichment_status.json'


def _save_enrichment_status_to_disk():
    """Persist enrichment status for resume after restarts."""
    try:
        tmp_path = ENRICHMENT_STATUS_FILE.with_suffix(ENRICHMENT_STATUS_FILE.suffix + '.tmp')
        with open(tmp_path, 'w', encoding='utf-8') as f:
            json.dump(enrichment_status, f, ensure_ascii=False, indent=2)
        os.replace(tmp_path, ENRICHMENT_STATUS_FILE)
    except Exception as e:
        print(f"[RAG] Failed to persist enrichment status: {e}")


def _load_enrichment_status_from_disk():
    """Restore enrichment status on startup (and mark stale running jobs as interrupted)."""
    global enrichment_status
    try:
        if ENRICHMENT_STATUS_FILE.exists():
            with open(ENRICHMENT_STATUS_FILE, 'r', encoding='utf-8') as f:
                data = json.load(f)
            if isinstance(data, dict):
                enrichment_status.update(data)
    except Exception as e:
        print(f"[RAG] Failed to load enrichment status: {e}")

    # Normalize defaults
    enrichment_status.setdefault('status', 'idle')
    enrichment_status.setdefault('progress', 0)
    enrichment_status.setdefault('message', '')
    enrichment_status.setdefault('llm_used', 0)
    enrichment_status.setdefault('updated_at', None)
    enrichment_status.setdefault('started_at', None)
    enrichment_status.setdefault('source', None)
    enrichment_status.setdefault('total_target', 0)
    enrichment_status.setdefault('done_target', 0)
    enrichment_status.setdefault('checkpoint_every', int(os.environ.get('ENRICHMENT_CHECKPOINT_EVERY', 10)))

    # If the process restarted while enrichment was running, mark as interrupted (resume is safe).
    if enrichment_status.get('status') == 'running':
        enrichment_status['status'] = 'interrupted'
        enrichment_status['message'] = (
            enrichment_status.get('message')
            or 'Previous enrichment was interrupted. You can resume.'
        )
        enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
        _save_enrichment_status_to_disk()

# API key authentication (required in production, dev fallback allowed)
FLASK_API_KEY = os.environ.get('FLASK_API_KEY')
if not FLASK_API_KEY:
    if os.environ.get('FLASK_ENV') == 'production' or os.environ.get('NODE_ENV') == 'production':
        print("ERROR: FLASK_API_KEY environment variable is not set in production", file=sys.stderr)
        sys.exit(1)
    else:
        # Dev fallback only - never used in production
        FLASK_API_KEY = 'dev-local-flask-api-key'
        print("[RAG] Warning: Using dev fallback API key. Set FLASK_API_KEY for production.")

def require_api_key(f):
    @wraps(f)
    def decorated(*args, **kwargs):
        api_key = request.headers.get('X-API-Key', '')
        if api_key != FLASK_API_KEY:
            return jsonify({'error': 'Invalid API key'}), 401
        return f(*args, **kwargs)
    return decorated


# Initialize on startup
print("[RAG] Initializing Advanced Sports Betting RAG Service...")
vector_store.load()
_load_enrichment_status_from_disk()


def update_progress(job_id: str, status: str, progress: int, message: str, **extra):
    """Update upload progress"""
    with progress_lock:
        upload_progress[job_id] = {
            'status': status,
            'progress': progress,
            'message': message,
            'updated': time.time(),
            **extra
        }


def process_file_background(job_id: str, file_path: Path, filename: str):
    """Process uploaded file in background"""
    try:
        update_progress(job_id, 'processing', 10, 'Extracting text from document...')
        
        # Extract text
        text = pdf_processor.extract_text(file_path)
        
        # Try OCR if extraction failed
        if not text or len(text.strip()) < 100:
            update_progress(job_id, 'processing', 20, 'Text extraction failed, trying OCR...')
            text = ocr_service.extract_text(file_path)
        
        if not text or len(text.strip()) < 100:
            update_progress(job_id, 'failed', 0, 'Failed to extract text from document')
            return
        
        update_progress(job_id, 'processing', 40, f'Extracted {len(text)} characters. Chunking...')
        
        # Detect document type
        doc_type = pdf_processor.detect_document_type(text, filename)
        
        # Chunk text
        chunks = pdf_processor.chunk_text(text, doc_type)
        
        if not chunks:
            update_progress(job_id, 'failed', 0, 'No valid chunks extracted from document')
            return
        
        # Add metadata
        for i, chunk in enumerate(chunks):
            chunk['source'] = filename
            chunk['chunk_id'] = i
            chunk['total_chunks'] = len(chunks)
            chunk['document_type'] = doc_type
        
        update_progress(job_id, 'processing', 60, f'Created {len(chunks)} chunks. Generating embeddings...')
        
        # Replace in vector store
        vector_store.replace_source(filename, chunks)
        
        update_progress(job_id, 'processing', 90, 'Saving index...')
        vector_store.save()
        
        update_progress(
            job_id, 'completed', 100,
            f'Successfully processed! Added {len(chunks)} chunks.',
            chunks_added=len(chunks),
            document_type=doc_type,
            total_documents=len(vector_store.documents)
        )
        
    except Exception as e:
        print(f"[RAG] Error processing file: {e}")
        import traceback
        traceback.print_exc()
        update_progress(job_id, 'failed', 0, f'Error: {str(e)}')


# ============================================================================
# API Endpoints
# ============================================================================

@app.route('/')
def index():
    """Serve upload interface with chunk viewer and enrichment controls"""
    return '''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sports Betting RAG - Document Upload</title>
    <style>
        * { box-sizing: border-box; margin: 0; padding: 0; }
        body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); min-height: 100vh; padding: 20px; color: #e4e4e7; }
        .container { max-width: 1000px; margin: 0 auto; }
        h1 { color: #fbbf24; margin-bottom: 10px; font-size: 2rem; }
        .subtitle { color: #9ca3af; margin-bottom: 30px; }
        .card { background: rgba(255,255,255,0.05); border-radius: 12px; padding: 24px; margin-bottom: 20px; border: 1px solid rgba(255,255,255,0.1); }
        .card h2 { color: #fbbf24; font-size: 1.25rem; margin-bottom: 16px; }
        .stats { display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 16px; }
        .stat { background: rgba(251,191,36,0.1); padding: 16px; border-radius: 8px; text-align: center; }
        .stat-value { font-size: 1.75rem; font-weight: bold; color: #fbbf24; }
        .stat-label { color: #9ca3af; font-size: 0.75rem; }
        form { display: flex; flex-direction: column; gap: 16px; }
        input[type="text"], input[type="file"], select { padding: 12px; border-radius: 8px; border: 1px solid rgba(255,255,255,0.2); background: rgba(0,0,0,0.2); color: white; font-size: 1rem; }
        input[type="file"] { cursor: pointer; }
        button { background: #fbbf24; color: #1a1a2e; padding: 12px 20px; border: none; border-radius: 8px; font-weight: 600; font-size: 0.9rem; cursor: pointer; transition: all 0.2s; }
        button:hover { background: #f59e0b; transform: translateY(-1px); }
        button:disabled { opacity: 0.5; cursor: not-allowed; transform: none; }
        button.secondary { background: #6366f1; color: white; }
        button.secondary:hover { background: #4f46e5; }
        button.danger { background: #ef4444; color: white; }
        .btn-group { display: flex; gap: 10px; flex-wrap: wrap; }
        .progress-container { margin-top: 20px; }
        .progress-bar { height: 8px; background: rgba(255,255,255,0.1); border-radius: 4px; overflow: hidden; }
        .progress-fill { height: 100%; background: linear-gradient(90deg, #fbbf24, #f59e0b); transition: width 0.3s; }
        .progress-text { margin-top: 8px; color: #9ca3af; font-size: 0.875rem; }
        .result { padding: 16px; border-radius: 8px; margin-top: 16px; }
        .result.success { background: rgba(34,197,94,0.2); border: 1px solid rgba(34,197,94,0.3); }
        .result.error { background: rgba(239,68,68,0.2); border: 1px solid rgba(239,68,68,0.3); }
        .result.info { background: rgba(59,130,246,0.2); border: 1px solid rgba(59,130,246,0.3); }
        .tabs { display: flex; gap: 10px; margin-bottom: 20px; border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 10px; }
        .tab { padding: 8px 16px; cursor: pointer; border-radius: 6px; color: #9ca3af; }
        .tab.active { background: rgba(251,191,36,0.2); color: #fbbf24; }
        .tab:hover { background: rgba(255,255,255,0.05); }
        .chunk-list { max-height: 500px; overflow-y: auto; }
        .chunk-item { background: rgba(0,0,0,0.2); padding: 16px; border-radius: 8px; margin-bottom: 12px; border-left: 4px solid #6366f1; }
        .chunk-item.enriched { border-left-color: #22c55e; }
        .chunk-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }
        .chunk-source { color: #fbbf24; font-weight: 600; font-size: 0.875rem; }
        .chunk-badge { padding: 2px 8px; border-radius: 4px; font-size: 0.75rem; }
        .chunk-badge.enriched { background: rgba(34,197,94,0.3); color: #22c55e; }
        .chunk-badge.raw { background: rgba(156,163,175,0.3); color: #9ca3af; }
        .chunk-summary { color: #e4e4e7; margin-bottom: 8px; font-size: 0.9rem; }
        .chunk-themes { display: flex; gap: 6px; flex-wrap: wrap; margin-bottom: 8px; }
        .theme-tag { background: rgba(99,102,241,0.3); color: #a5b4fc; padding: 2px 8px; border-radius: 4px; font-size: 0.75rem; }
        .chunk-content { color: #9ca3af; font-size: 0.8rem; background: rgba(0,0,0,0.2); padding: 10px; border-radius: 4px; white-space: pre-wrap; max-height: 150px; overflow-y: auto; }
        .pagination { display: flex; justify-content: center; gap: 10px; margin-top: 16px; }
        .hidden { display: none !important; }
    </style>
</head>
<body>
    <div class="container">
        <h1>🏆 Sports Betting RAG</h1>
        <p class="subtitle">Advanced Document Processing with FAISS + BM25 + DeepSeek LLM Enrichment</p>
        
        <div class="tabs">
            <div class="tab active" onclick="showTab('status')">📊 Status</div>
            <div class="tab" onclick="showTab('upload')">📤 Upload</div>
            <div class="tab" onclick="showTab('chunks')">📄 View Chunks</div>
            <div class="tab" onclick="showTab('enrich')">✨ Enrichment</div>
        </div>
        
        <!-- Status Tab -->
        <div id="tab-status" class="card">
            <h2>📊 System Status</h2>
            <div class="stats" id="stats">
                <div class="stat"><div class="stat-value" id="docCount">-</div><div class="stat-label">Total Chunks</div></div>
                <div class="stat"><div class="stat-value" id="enrichedCount">-</div><div class="stat-label">Enriched</div></div>
                <div class="stat"><div class="stat-value" id="rawCount">-</div><div class="stat-label">Raw</div></div>
                <div class="stat"><div class="stat-value" id="fileCount">-</div><div class="stat-label">Files</div></div>
                <div class="stat"><div class="stat-value" id="searchType">-</div><div class="stat-label">Search</div></div>
            </div>
        </div>
        
        <!-- Upload Tab -->
        <div id="tab-upload" class="card hidden">
            <h2>📤 Upload Document</h2>
            <form id="uploadForm">
                <input type="text" id="apiKey" placeholder="API Key" value="eventheodds-flask-api-key-2025">
                <input type="file" id="fileInput" accept=".pdf,.txt" required>
                <div class="btn-group">
                    <button type="submit" id="submitBtn">Upload & Process</button>
                    <button type="button" class="secondary" onclick="uploadWithEnrich()">Upload + Enrich</button>
                </div>
            </form>
            <div class="progress-container" id="progressContainer" style="display:none;">
                <div class="progress-bar"><div class="progress-fill" id="progressFill" style="width:0%"></div></div>
                <div class="progress-text" id="progressText">Starting...</div>
            </div>
            <div id="result"></div>
        </div>
        
        <!-- Chunks Tab -->
        <div id="tab-chunks" class="card hidden">
            <h2>📄 View Chunks</h2>
            <div style="display:flex; gap:10px; margin-bottom:16px; flex-wrap:wrap;">
                <select id="chunkFilter" onchange="loadChunks()">
                    <option value="all">All Chunks</option>
                    <option value="enriched">Enriched Only</option>
                    <option value="raw">Raw Only</option>
                </select>
                <select id="sourceFilter" onchange="loadChunks()">
                    <option value="">All Sources</option>
                </select>
                <button class="secondary" onclick="loadChunks()">🔄 Refresh</button>
            </div>
            <div class="chunk-list" id="chunkList">Loading...</div>
            <div class="pagination" id="pagination"></div>
        </div>
        
        <!-- Enrichment Tab -->
        <div id="tab-enrich" class="card hidden">
            <h2>✨ LLM Enrichment</h2>
            <p style="color:#9ca3af; margin-bottom:16px;">Use DeepSeek LLM to generate summaries, key points, and themes for all chunks.</p>
            <div class="btn-group">
                <button onclick="startEnrichment()">🚀 Enrich All Raw Chunks</button>
                <button class="secondary" onclick="resumeEnrichment()">▶️ Resume</button>
                <button class="secondary" onclick="checkEnrichmentStatus()">🔄 Check Status</button>
            </div>
            <div class="progress-container" id="enrichProgress" style="display:none;">
                <div class="progress-bar"><div class="progress-fill" id="enrichProgressFill" style="width:0%"></div></div>
                <div class="progress-text" id="enrichProgressText">Starting...</div>
            </div>
            <div id="enrichResult"></div>
        </div>
    </div>
    
    <script>
        const BASE_URL = window.location.pathname.endsWith('/') ? window.location.pathname.slice(0,-1) : window.location.pathname;
        let currentPage = 1;
        
        function showTab(tabName) {
            document.querySelectorAll('.card').forEach(c => c.classList.add('hidden'));
            document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
            document.getElementById('tab-' + tabName).classList.remove('hidden');
            event.target.classList.add('active');
            
            if (tabName === 'chunks') loadChunks();
            if (tabName === 'status') loadStatus();
            if (tabName === 'enrich') checkEnrichmentStatus();
        }
        
        async function loadStatus() {
            try {
                const resp = await fetch(BASE_URL + '/documents', {cache:'no-store'});
                const data = await resp.json();
                document.getElementById('docCount').textContent = data.total_chunks || 0;
                document.getElementById('enrichedCount').textContent = data.total_enriched || 0;
                document.getElementById('rawCount').textContent = (data.total_chunks || 0) - (data.total_enriched || 0);
                document.getElementById('fileCount').textContent = data.total_sources || 0;
                
                // Update source filter
                const sourceSelect = document.getElementById('sourceFilter');
                sourceSelect.innerHTML = '<option value="">All Sources</option>';
                (data.documents || []).forEach(d => {
                    sourceSelect.innerHTML += `<option value="${d.source}">${d.source} (${d.chunk_count})</option>`;
                });
                
                // Get search type
                const statusResp = await fetch(BASE_URL + '/status', {cache:'no-store'});
                const statusData = await statusResp.json();
                document.getElementById('searchType').textContent = statusData.has_faiss ? 'FAISS' : 'Cosine';
            } catch(e) {
                console.error('Status load failed:', e);
            }
        }
        
        async function loadChunks() {
            const filter = document.getElementById('chunkFilter').value;
            const source = document.getElementById('sourceFilter').value;
            const chunkList = document.getElementById('chunkList');
            chunkList.innerHTML = 'Loading...';
            
            let url = BASE_URL + '/chunks?page=' + currentPage + '&per_page=10';
            if (filter === 'enriched') url += '&enriched=true';
            if (filter === 'raw') url += '&raw=true';
            if (source) url += '&source=' + encodeURIComponent(source);
            
            try {
                const resp = await fetch(url, {cache:'no-store'});
                const data = await resp.json();
                
                if (!data.chunks || data.chunks.length === 0) {
                    chunkList.innerHTML = '<p style="color:#9ca3af;">No chunks found</p>';
                    return;
                }
                
                chunkList.innerHTML = data.chunks.map(c => `
                    <div class="chunk-item ${c.enriched ? 'enriched' : ''}">
                        <div class="chunk-header">
                            <span class="chunk-source">${c.source} #${c.chunk_id}</span>
                            <span class="chunk-badge ${c.enriched ? 'enriched' : 'raw'}">${c.enriched ? '✓ Enriched' : 'Raw'}</span>
                        </div>
                        ${c.summary ? `<div class="chunk-summary"><strong>Summary:</strong> ${c.summary}</div>` : ''}
                        ${c.themes && c.themes.length ? `<div class="chunk-themes">${c.themes.map(t => `<span class="theme-tag">${t}</span>`).join('')}</div>` : ''}
                        <div class="chunk-content">${c.content_preview}</div>
                    </div>
                `).join('');
                
                // Pagination
                const pag = data.pagination;
                document.getElementById('pagination').innerHTML = `
                    <button ${pag.page <= 1 ? 'disabled' : ''} onclick="currentPage=${pag.page-1};loadChunks()">← Prev</button>
                    <span style="color:#9ca3af;">Page ${pag.page} of ${pag.pages} (${pag.total} chunks)</span>
                    <button ${pag.page >= pag.pages ? 'disabled' : ''} onclick="currentPage=${pag.page+1};loadChunks()">Next →</button>
                `;
            } catch(e) {
                chunkList.innerHTML = '<p style="color:#ef4444;">Error loading chunks</p>';
            }
        }
        
        async function startEnrichment() {
            const apiKey = document.getElementById('apiKey').value.trim() || 'eventheodds-flask-api-key-2025';
            document.getElementById('enrichProgress').style.display = 'block';
            document.getElementById('enrichResult').innerHTML = '';
            
            try {
                const resp = await fetch(BASE_URL + '/enrich', {
                    method: 'POST',
                    headers: {'X-API-Key': apiKey, 'Content-Type': 'application/json'}
                });
                const data = await resp.json();
                
                if (data.error) {
                    document.getElementById('enrichResult').innerHTML = `<div class="result error">❌ ${data.error}</div>`;
                } else {
                    pollEnrichment();
                }
            } catch(e) {
                document.getElementById('enrichResult').innerHTML = `<div class="result error">❌ Error: ${e.message}</div>`;
            }
        }

        async function resumeEnrichment() {
            // Resume is the same API call as start; the server will skip already-enriched chunks.
            await startEnrichment();
        }
        
        async function pollEnrichment() {
            const poll = async () => {
                const resp = await fetch(BASE_URL + '/enrichment-status', {cache:'no-store'});
                const data = await resp.json();
                
                document.getElementById('enrichProgressFill').style.width = data.progress + '%';
                document.getElementById('enrichProgressText').textContent = data.message || 'Processing...';
                
                if (data.status === 'completed') {
                    document.getElementById('enrichResult').innerHTML = `<div class="result success">✅ ${data.message}</div>`;
                    loadStatus();
                    return;
                } else if (data.status === 'failed') {
                    document.getElementById('enrichResult').innerHTML = `<div class="result error">❌ ${data.message}</div>`;
                    return;
                } else if (data.status === 'interrupted') {
                    document.getElementById('enrichResult').innerHTML =
                      `<div class="result info">⏸️ Interrupted. Progress saved. Click <strong>Resume</strong> to continue.</div>`;
                    return;
                } else if (data.status === 'idle') {
                    document.getElementById('enrichProgress').style.display = 'none';
                    return;
                }
                
                setTimeout(poll, 1000);
            };
            poll();
        }
        
        async function checkEnrichmentStatus() {
            const resp = await fetch(BASE_URL + '/enrichment-status', {cache:'no-store'});
            const data = await resp.json();
            
            if (data.status === 'running' || data.status === 'interrupted') {
                document.getElementById('enrichProgress').style.display = 'block';
                document.getElementById('enrichProgressFill').style.width = data.progress + '%';
                document.getElementById('enrichProgressText').textContent = data.message;
                if (data.status === 'running') {
                  pollEnrichment();
                } else {
                  document.getElementById('enrichResult').innerHTML =
                    `<div class="result info">⏸️ Interrupted. Progress saved. Click <strong>Resume</strong> to continue. (LLM calls: ${data.llm_used || 0})</div>`;
                }
            } else {
                document.getElementById('enrichProgress').style.display = 'none';
                document.getElementById('enrichResult').innerHTML = `<div class="result info">Status: ${data.status} | LLM calls: ${data.llm_used || 0}</div>`;
            }
        }
        
        async function uploadWithEnrich() {
            const fileInput = document.getElementById('fileInput');
            const apiKey = document.getElementById('apiKey').value.trim() || 'eventheodds-flask-api-key-2025';
            
            if (!fileInput.files[0]) {
                document.getElementById('result').innerHTML = '<div class="result error">Please select a file</div>';
                return;
            }
            
            document.getElementById('submitBtn').disabled = true;
            document.getElementById('progressContainer').style.display = 'block';
            document.getElementById('progressText').textContent = 'Uploading with enrichment...';
            
            const formData = new FormData();
            formData.append('file', fileInput.files[0]);
            
            try {
                const resp = await fetch(BASE_URL + '/enrich-upload', {
                    method: 'POST',
                    headers: {'X-API-Key': apiKey},
                    body: formData
                });
                const data = await resp.json();
                if (data.job_id) pollProgress(data.job_id, apiKey);
            } catch(e) {
                document.getElementById('result').innerHTML = `<div class="result error">❌ ${e.message}</div>`;
                document.getElementById('submitBtn').disabled = false;
            }
        }
        
        // Standard upload
        document.getElementById('uploadForm').onsubmit = async (e) => {
            e.preventDefault();
            const fileInput = document.getElementById('fileInput');
            const apiKey = document.getElementById('apiKey').value.trim() || 'eventheodds-flask-api-key-2025';
            
            if (!fileInput.files[0]) {
                document.getElementById('result').innerHTML = '<div class="result error">Please select a file</div>';
                return;
            }
            
            document.getElementById('submitBtn').disabled = true;
            document.getElementById('progressContainer').style.display = 'block';
            
            const formData = new FormData();
            formData.append('file', fileInput.files[0]);
            
            try {
                const resp = await fetch(BASE_URL + '/upload', {
                    method: 'POST',
                    headers: {'X-API-Key': apiKey},
                    body: formData
                });
                const data = await resp.json();
                if (data.job_id) pollProgress(data.job_id, apiKey);
            } catch(e) {
                document.getElementById('result').innerHTML = `<div class="result error">❌ ${e.message}</div>`;
                document.getElementById('submitBtn').disabled = false;
            }
        };
        
        async function pollProgress(jobId, apiKey) {
            const poll = async () => {
                const resp = await fetch(BASE_URL + '/progress/' + jobId, {cache:'no-store'});
                const data = await resp.json();
                
                document.getElementById('progressFill').style.width = data.progress + '%';
                document.getElementById('progressText').textContent = data.message || 'Processing...';
                
                if (data.status === 'completed') {
                    document.getElementById('result').innerHTML = `<div class="result success">✅ ${data.message}</div>`;
                    document.getElementById('submitBtn').disabled = false;
                    loadStatus();
                    return;
                } else if (data.status === 'failed') {
                    document.getElementById('result').innerHTML = `<div class="result error">❌ ${data.message}</div>`;
                    document.getElementById('submitBtn').disabled = false;
                    return;
                }
                setTimeout(poll, 500);
            };
            poll();
        }
        
        loadStatus();
    </script>
</body>
</html>'''


@app.route('/health', methods=['GET'])
def health():
    """Health check"""
    return jsonify({
        'status': 'healthy',
        'total_documents': len(vector_store.documents),
        'has_faiss': HAS_FAISS,
        'has_sentence_transformers': HAS_SENTENCE_TRANSFORMERS,
        'has_pymupdf': HAS_PYMUPDF
    })


@app.route('/status', methods=['GET'])
def status():
    """Get system status"""
    # Count unique sources
    sources = set(d.get('source', '') for d in vector_store.documents)
    
    return jsonify({
        'total_documents': len(vector_store.documents),
        'total_files': len(sources),
        'has_faiss': HAS_FAISS,
        'has_sentence_transformers': HAS_SENTENCE_TRANSFORMERS,
        'embedding_model': 'SentenceTransformer' if HAS_SENTENCE_TRANSFORMERS else 'hash-based',
        'search_type': 'FAISS + BM25 Hybrid' if HAS_FAISS else 'Cosine + BM25 Hybrid'
    })


@app.route('/upload', methods=['POST'])
@require_api_key
def upload_file():
    """Upload and process a document"""
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400
    
    file = request.files['file']
    if not file.filename:
        return jsonify({'error': 'Empty filename'}), 400
    
    # Validate extension
    ext = Path(file.filename).suffix.lower()
    if ext not in ['.pdf', '.txt']:
        return jsonify({'error': 'Only PDF and TXT files are supported'}), 400
    
    # Save file
    safe_filename = secure_filename(file.filename)
    file_path = config.pdf_dir / safe_filename
    file.save(str(file_path))
    
    # Start background processing
    job_id = str(uuid.uuid4())[:8]
    update_progress(job_id, 'starting', 0, 'File uploaded, starting processing...')
    
    thread = threading.Thread(
        target=process_file_background,
        args=(job_id, file_path, safe_filename),
        daemon=True
    )
    thread.start()
    
    return jsonify({
        'success': True,
        'job_id': job_id,
        'message': 'File uploaded. Processing started.',
        'filename': safe_filename
    }), 202


@app.route('/progress/<job_id>', methods=['GET'])
def get_progress(job_id):
    """Get upload progress"""
    with progress_lock:
        if job_id in upload_progress:
            return jsonify(upload_progress[job_id])
    return jsonify({'status': 'unknown', 'progress': 0, 'message': 'Job not found'}), 404


@app.route('/ask', methods=['POST'])
@require_api_key
def ask():
    """Query the RAG system with enriched context support"""
    data = request.get_json(silent=True) or {}
    question = data.get('question', '').strip()
    use_enriched = data.get('use_enriched', True)  # Default to using enriched data
    
    if not question:
        return jsonify({'error': 'Question is required'}), 400
    
    k = data.get('k', 5)
    
    # Search
    results = vector_store.search(question, k=k)
    
    if not results:
        return jsonify({
            'answer': "I don't have enough information to answer that question.",
            'sources': [],
            'chunks_searched': len(vector_store.documents),
            'enriched_count': 0
        })
    
    # Build context - prefer enriched summaries when available
    context_parts = []
    enriched_count = 0
    all_themes = set()
    all_key_points = []
    
    for r in results:
        meta = r.get('metadata', {})
        is_enriched = meta.get('enriched', False)
        
        if use_enriched and is_enriched and meta.get('summary'):
            # Use enriched summary + key points
            enriched_count += 1
            part = f"**Summary**: {meta.get('summary', '')}"
            
            key_points = meta.get('key_points', [])
            if key_points:
                all_key_points.extend(key_points[:2])  # Collect top 2 key points per chunk
                part += f"\n**Key Points**: {'; '.join(key_points[:3])}"
            
            themes = meta.get('themes', [])
            if themes:
                all_themes.update(themes)
            
            context_parts.append(part)
        else:
            # Fall back to raw content
            context_parts.append(r['content'][:500])
    
    context = '\n\n---\n\n'.join(context_parts)
    
    # --- LIVE DATA INJECTION (BALLDONTLIE 2025-26) ---
    live_context = ""
    try:
        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        context_path = os.path.join(base_dir, 'airagagent', 'nfl_context_2025.txt')
        
        if os.path.exists(context_path):
            with open(context_path, 'r', encoding='utf-8') as f:
                live_context = f.read().strip()
                print(f"[RAG] Injected {len(live_context)} chars of live NFL context")
        else:
             print(f"[RAG] Context file not found at {context_path}")
    except Exception as e:
        print(f"[RAG] Context injection error: {e}")

    # Build prompt for LLM (Grok)
    prompt = f"""You are the AI Guru, an expert sports betting assistant.
Answer the user's question using the provided Context and Live Data.

=== LIVE REAL-TIME DATA (2025-2026 NFL SEASON) ===
{live_context}

=== RETRIEVED KNOWLEDGE (STRATEGY & BOOKS) ===
{context[:2000]}

=== QUESTION ===
{question}

INSTRUCTIONS:
1. Prioritize 'LIVE REAL-TIME DATA' for specific 2025-26 standings, records, and current stats.
2. Use 'RETRIEVED KNOWLEDGE' for betting theory, concepts, and general strategy.
3. If the question asks about a specific team's CURRENT record (e.g. Patriots), use the Live Data.
4. Synthesize everything into a natural, helpful response.
5. If you cannot answer, say so, but provide relevant context.

ANSWER:"""

    # Generate answer with LLM
    try:
        llm_service = LLMService() # Now uses Grok enforced
        generated_answer = llm_service.generate(prompt, max_tokens=1000, temperature=0.5)
        if generated_answer:
            answer = generated_answer.strip()
        else:
            answer = "I could not generate a specific answer (Grok API return empty). Here is the relevant context:\n\n" + context[:2500]
    except Exception as e:
        print(f"[RAG] LLM generation error: {e}")
        answer = "Error generating answer. Here is the context:\n\n" + context[:2500]

    # Build detailed sources with enrichment info
    sources = []
    for r in results:
        meta = r.get('metadata', {})
        sources.append({
            'source': meta.get('source', 'unknown'),
            'score': round(r['score'], 3),
            'preview': r['content'][:150],
            'enriched': meta.get('enriched', False),
            'summary': meta.get('summary', '')[:200] if meta.get('summary') else None,
            'themes': meta.get('themes', [])
        })
    
    return jsonify({
        'answer': answer,
        'sources': sources,
        'chunks_searched': len(vector_store.documents),
        'chunks_returned': len(results),
        'enriched_count': enriched_count,
        'themes': list(all_themes)[:10],
        'key_points': all_key_points[:5]
    })
@app.route('/search', methods=['POST'])
@require_api_key
def search():
    """Search documents"""
    data = request.get_json(silent=True) or {}
    query = data.get('query', '').strip()
    k = data.get('k', 10)
    
    if not query:
        return jsonify({'error': 'Query is required'}), 400
    
    results = vector_store.search(query, k=k)
    
    return jsonify({
        'results': results,
        'query': query,
        'total_documents': len(vector_store.documents)
    })


@app.route('/documents', methods=['GET'])
def list_documents():
    """List all documents with enrichment status"""
    sources = {}
    for doc in vector_store.documents:
        source = doc.get('source', 'unknown')
        if source not in sources:
            sources[source] = {
                'chunk_count': 0,
                'enriched_count': 0,
                'document_type': doc.get('document_type', 'unknown')
            }
        sources[source]['chunk_count'] += 1
        if doc.get('enriched'):
            sources[source]['enriched_count'] += 1
    
    # Calculate totals
    total_chunks = len(vector_store.documents)
    total_enriched = sum(1 for d in vector_store.documents if d.get('enriched'))
    
    return jsonify({
        'documents': [{'source': k, **v} for k, v in sources.items()],
        'total_sources': len(sources),
        'total_chunks': total_chunks,
        'total_enriched': total_enriched,
        'enrichment_percentage': round((total_enriched / total_chunks * 100) if total_chunks > 0 else 0, 1)
    })


@app.route('/chunks', methods=['GET'])
def list_chunks():
    """List chunks with pagination and filtering"""
    # Query params
    source = request.args.get('source')
    enriched_only = request.args.get('enriched', '').lower() == 'true'
    raw_only = request.args.get('raw', '').lower() == 'true'
    page = int(request.args.get('page', 1))
    per_page = int(request.args.get('per_page', 20))
    
    # Filter chunks
    filtered = vector_store.documents
    
    if source:
        filtered = [d for d in filtered if d.get('source') == source]
    
    if enriched_only:
        filtered = [d for d in filtered if d.get('enriched')]
    elif raw_only:
        filtered = [d for d in filtered if not d.get('enriched')]
    
    # Paginate
    total = len(filtered)
    start = (page - 1) * per_page
    end = start + per_page
    paginated = filtered[start:end]
    
    # Format chunks for display
    chunks = []
    for i, doc in enumerate(paginated, start=start):
        chunks.append({
            'index': i,
            'source': doc.get('source', 'unknown'),
            'chunk_id': doc.get('chunk_id', i),
            'enriched': doc.get('enriched', False),
            'summary': doc.get('summary', '')[:200] if doc.get('summary') else None,
            'key_points': doc.get('key_points', [])[:3],
            'themes': doc.get('themes', []),
            'content_preview': doc.get('content', '')[:300] + '...' if len(doc.get('content', '')) > 300 else doc.get('content', ''),
            'content_length': len(doc.get('content', ''))
        })
    
    return jsonify({
        'chunks': chunks,
        'pagination': {
            'page': page,
            'per_page': per_page,
            'total': total,
            'pages': (total + per_page - 1) // per_page
        },
        'filters': {
            'source': source,
            'enriched_only': enriched_only,
            'raw_only': raw_only
        }
    })


@app.route('/chunks/<int:chunk_index>', methods=['GET'])
def get_chunk(chunk_index):
    """Get a single chunk with full details"""
    if chunk_index < 0 or chunk_index >= len(vector_store.documents):
        return jsonify({'error': 'Chunk not found'}), 404
    
    doc = vector_store.documents[chunk_index]
    
    return jsonify({
        'index': chunk_index,
        'source': doc.get('source', 'unknown'),
        'chunk_id': doc.get('chunk_id'),
        'document_type': doc.get('document_type'),
        'enriched': doc.get('enriched', False),
        'summary': doc.get('summary'),
        'key_points': doc.get('key_points', []),
        'themes': doc.get('themes', []),
        'content': doc.get('content', ''),
        'content_length': len(doc.get('content', '')),
        'section': doc.get('section'),
        'total_chunks': doc.get('total_chunks')
    })


@app.route('/chunks/by-source/<path:source_name>', methods=['GET'])
def get_chunks_by_source(source_name):
    """Get all chunks for a specific source"""
    safe_source = secure_filename(source_name)
    chunks = [d for d in vector_store.documents if d.get('source') == safe_source]
    
    if not chunks:
        return jsonify({'error': 'Source not found'}), 404
    
    enriched_count = sum(1 for c in chunks if c.get('enriched'))
    
    return jsonify({
        'source': safe_source,
        'total_chunks': len(chunks),
        'enriched_chunks': enriched_count,
        'chunks': [{
            'chunk_id': c.get('chunk_id'),
            'enriched': c.get('enriched', False),
            'summary': c.get('summary', '')[:200] if c.get('summary') else None,
            'themes': c.get('themes', []),
            'content_preview': c.get('content', '')[:200]
        } for c in chunks]
    })


@app.route('/delete/<path:filename>', methods=['DELETE', 'POST'])
@require_api_key
def delete_document(filename):
    """Delete a document"""
    safe_filename = secure_filename(filename)
    
    # Remove from vector store
    original_count = len(vector_store.documents)
    vector_store.documents = [d for d in vector_store.documents if d.get('source') != safe_filename]
    removed_count = original_count - len(vector_store.documents)
    
    if removed_count > 0:
        # Rebuild index
        if vector_store.documents:
            contents = [d['content'] for d in vector_store.documents]
            embeddings = vector_store._get_embeddings_batch(contents)
            
            if HAS_FAISS:
                vector_store.index = faiss.IndexFlatIP(vector_store.embedding_dim)
                embeddings_np = embeddings.astype('float32')
                faiss.normalize_L2(embeddings_np)
                vector_store.index.add(embeddings_np)
            else:
                vector_store.embeddings = embeddings
        else:
            if HAS_FAISS:
                vector_store.index = faiss.IndexFlatIP(vector_store.embedding_dim)
            else:
                vector_store.embeddings = np.array([])
        
        vector_store._prepare_searchable_texts()
        vector_store.save()
        
        # Delete source file
        file_path = config.pdf_dir / safe_filename
        if file_path.exists():
            file_path.unlink()
        
        return jsonify({
            'success': True,
            'message': f'Deleted {safe_filename} ({removed_count} chunks)',
            'chunks_removed': removed_count
        })
    
    return jsonify({'error': 'Document not found'}), 404


@app.route('/reload', methods=['POST'])
@require_api_key
def reload():
    """Reload the index"""
    vector_store.load()
    return jsonify({
        'success': True,
        'total_documents': len(vector_store.documents)
    })


@app.route('/enrich', methods=['POST'])
@require_api_key
def enrich_documents():
    """Enrich documents with LLM-generated metadata"""
    global enrichment_status, enrichment_thread
    
    with enrichment_lock:
        if enrichment_status.get('status') == 'running' and enrichment_thread is not None and enrichment_thread.is_alive():
            return jsonify({
                'error': 'Enrichment already running',
                'status': enrichment_status
            }), 409
        # Handle stale "running" state (process restarted / thread died)
        if enrichment_status.get('status') == 'running':
            enrichment_status['status'] = 'interrupted'
            enrichment_status['message'] = enrichment_status.get('message') or 'Previous enrichment was interrupted. You can resume.'
            enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
            _save_enrichment_status_to_disk()
    
    data = request.get_json(silent=True) or {}
    source_filter = data.get('source')  # Optional: enrich only specific source
    
    def run_enrichment():
        global enrichment_status
        try:
            checkpoint_every = int(os.environ.get('ENRICHMENT_CHECKPOINT_EVERY', enrichment_status.get('checkpoint_every', 10) or 10))

            # Ensure searchable caches exist
            try:
                vector_store._prepare_searchable_texts()
            except Exception:
                pass

            # Determine target set (for accurate progress + resume)
            if source_filter:
                target_indices = [i for i, d in enumerate(vector_store.documents) if d.get('source') == source_filter]
            else:
                target_indices = list(range(len(vector_store.documents)))

            total_target = len(target_indices)
            if total_target == 0:
                with enrichment_lock:
                    enrichment_status['status'] = 'completed'
                    enrichment_status['progress'] = 100
                    enrichment_status['message'] = 'No chunks found to enrich.'
                    enrichment_status['llm_used'] = enrichment_service.enrichment_used
                    enrichment_status['source'] = source_filter
                    enrichment_status['total_target'] = 0
                    enrichment_status['done_target'] = 0
                    enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
                    enrichment_status['checkpoint_every'] = checkpoint_every
                    _save_enrichment_status_to_disk()
                return

            already_enriched = sum(1 for i in target_indices if vector_store.documents[i].get('enriched'))
            remaining = total_target - already_enriched
            started_at = datetime.utcnow().isoformat() + 'Z'

            with enrichment_lock:
                enrichment_status['status'] = 'running'
                enrichment_status['progress'] = int((already_enriched / total_target) * 100) if total_target else 0
                enrichment_status['message'] = f'Resuming enrichment: {already_enriched}/{total_target} already enriched. Enriching remaining {remaining}...'
                enrichment_status['llm_used'] = enrichment_service.enrichment_used
                enrichment_status['started_at'] = enrichment_status.get('started_at') or started_at
                enrichment_status['updated_at'] = started_at
                enrichment_status['source'] = source_filter
                enrichment_status['total_target'] = total_target
                enrichment_status['done_target'] = already_enriched
                enrichment_status['checkpoint_every'] = checkpoint_every
                _save_enrichment_status_to_disk()

            if remaining <= 0:
                with enrichment_lock:
                    enrichment_status['status'] = 'completed'
                    enrichment_status['progress'] = 100
                    enrichment_status['message'] = 'No chunks need enrichment (all already enriched)'
                    enrichment_status['llm_used'] = enrichment_service.enrichment_used
                    enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
                    _save_enrichment_status_to_disk()
                return

            enriched_this_run = 0

            for doc_index in target_indices:
                doc = vector_store.documents[doc_index]
                if doc.get('enriched'):
                    continue

                enriched_doc = enrichment_service.enrich_chunk(doc)
                doc.update(enriched_doc)
                doc['enriched_at'] = datetime.utcnow().isoformat() + 'Z'

                # Update cached searchable text for this doc
                try:
                    new_text = vector_store._build_searchable_text(doc)
                    if len(vector_store.searchable_texts) == len(vector_store.documents):
                        vector_store.searchable_texts[doc_index] = new_text.lower()
                    if len(vector_store.doc_lengths) == len(vector_store.documents):
                        old_len = vector_store.doc_lengths[doc_index]
                        new_len = len(new_text.split())
                        vector_store.doc_lengths[doc_index] = new_len
                        n = len(vector_store.doc_lengths)
                        if n > 0:
                            vector_store.avg_doc_length = (vector_store.avg_doc_length * n - old_len + new_len) / n
                except Exception:
                    pass

                enriched_this_run += 1
                done = already_enriched + enriched_this_run
                progress = int((done / total_target) * 100) if total_target else 0

                with enrichment_lock:
                    enrichment_status['status'] = 'running'
                    enrichment_status['progress'] = progress
                    enrichment_status['message'] = f'Enriched {done}/{total_target} chunks (this run: {enriched_this_run}/{remaining})'
                    enrichment_status['llm_used'] = enrichment_service.enrichment_used
                    enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
                    enrichment_status['done_target'] = done
                    enrichment_status['checkpoint_every'] = checkpoint_every

                # Periodic checkpoint so progress is not lost on interruptions
                if enriched_this_run % checkpoint_every == 0:
                    vector_store.save_metadata_only()
                    with enrichment_lock:
                        _save_enrichment_status_to_disk()

            # Final checkpoint and completion
            vector_store.save_metadata_only()
            with enrichment_lock:
                enrichment_status['status'] = 'completed'
                enrichment_status['progress'] = 100
                enrichment_status['message'] = f'Successfully enriched {enriched_this_run} chunks (target: {total_target})'
                enrichment_status['llm_used'] = enrichment_service.enrichment_used
                enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
                enrichment_status['done_target'] = total_target
                _save_enrichment_status_to_disk()
            
        except Exception as e:
            print(f"[RAG] Enrichment error: {e}")
            import traceback
            traceback.print_exc()
            with enrichment_lock:
                enrichment_status['status'] = 'failed'
                enrichment_status['message'] = f'Error: {str(e)}'
                enrichment_status['llm_used'] = enrichment_service.enrichment_used
                enrichment_status['updated_at'] = datetime.utcnow().isoformat() + 'Z'
                _save_enrichment_status_to_disk()
    
    # Run in background thread
    enrichment_thread = threading.Thread(target=run_enrichment, daemon=True)
    enrichment_thread.start()
    
    return jsonify({
        'success': True,
        'message': 'Enrichment started',
        'status': 'running'
    }), 202


@app.route('/enrichment-status', methods=['GET'])
def get_enrichment_status():
    """Get current enrichment status"""
    with enrichment_lock:
        return jsonify(enrichment_status)


@app.route('/enrich-upload', methods=['POST'])
@require_api_key
def enrich_on_upload():
    """Upload and process with immediate enrichment"""
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400
    
    file = request.files['file']
    if not file.filename:
        return jsonify({'error': 'Empty filename'}), 400
    
    ext = Path(file.filename).suffix.lower()
    if ext not in ['.pdf', '.txt']:
        return jsonify({'error': 'Only PDF and TXT files supported'}), 400
    
    safe_filename = secure_filename(file.filename)
    file_path = config.pdf_dir / safe_filename
    file.save(str(file_path))
    
    job_id = str(uuid.uuid4())[:8]
    update_progress(job_id, 'starting', 0, 'File uploaded, starting processing with enrichment...')
    
    def process_and_enrich():
        try:
            update_progress(job_id, 'processing', 10, 'Extracting text...')
            
            text = pdf_processor.extract_text(file_path)
            if not text or len(text.strip()) < 100:
                update_progress(job_id, 'processing', 15, 'Trying OCR...')
                text = ocr_service.extract_text(file_path)
            
            if not text or len(text.strip()) < 100:
                update_progress(job_id, 'failed', 0, 'Failed to extract text')
                return
            
            update_progress(job_id, 'processing', 30, 'Chunking document...')
            
            doc_type = pdf_processor.detect_document_type(text, safe_filename)
            chunks = pdf_processor.chunk_text(text, doc_type)
            
            if not chunks:
                update_progress(job_id, 'failed', 0, 'No valid chunks extracted')
                return
            
            for i, chunk in enumerate(chunks):
                chunk['source'] = safe_filename
                chunk['chunk_id'] = i
                chunk['total_chunks'] = len(chunks)
                chunk['document_type'] = doc_type
            
            update_progress(job_id, 'processing', 50, f'Enriching {len(chunks)} chunks with LLM...')
            
            # Enrich chunks
            def enrich_progress(current, total):
                pct = 50 + int((current / total) * 30)
                update_progress(job_id, 'processing', pct, f'Enriching chunk {current}/{total}...')
            
            enriched_chunks = enrichment_service.enrich_all_chunks(chunks, enrich_progress)
            
            update_progress(job_id, 'processing', 85, 'Adding to vector store...')
            
            vector_store.replace_source(safe_filename, enriched_chunks)
            
            update_progress(job_id, 'processing', 95, 'Saving index...')
            vector_store.save()
            
            update_progress(
                job_id, 'completed', 100,
                f'Successfully processed and enriched {len(enriched_chunks)} chunks!',
                chunks_added=len(enriched_chunks),
                llm_used=enrichment_service.enrichment_used
            )
            
        except Exception as e:
            print(f"[RAG] Error: {e}")
            import traceback
            traceback.print_exc()
            update_progress(job_id, 'failed', 0, f'Error: {str(e)}')
    
    thread = threading.Thread(target=process_and_enrich, daemon=True)
    thread.start()
    
    return jsonify({
        'success': True,
        'job_id': job_id,
        'message': 'File uploaded. Processing with enrichment started.',
        'filename': safe_filename
    }), 202


# ============================================================================
# Main
# ============================================================================
if __name__ == '__main__':
    port = int(os.environ.get('RAG_PORT', 5001))
    print(f"[RAG] Starting Advanced Sports Betting RAG Service on port {port}")
    print(f"[RAG] FAISS: {HAS_FAISS}, SentenceTransformers: {HAS_SENTENCE_TRANSFORMERS}, PyMuPDF: {HAS_PYMUPDF}")
    print(f"[RAG] Documents loaded: {len(vector_store.documents)}")
    app.run(host='0.0.0.0', port=port, debug=False)