#!/usr/bin/env python3
"""
Public OCR API Endpoint
Proxies requests to internal DeepSeek-OCR service with API key authentication.
"""

import os
import requests
from flask import Flask, request, jsonify, Response
from flask_cors import CORS
from functools import wraps
import tempfile
import time
import logging

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app, origins="*")  # Allow all origins for local development

# Configuration
API_KEY = os.environ.get("OCR_API_KEY", "deepseek-ocr-2024-secret-key")
OCR_SERVICE_URL = "http://127.0.0.1:5003"
MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB max

def require_api_key(f):
    """Decorator to require API key authentication"""
    @wraps(f)
    def decorated(*args, **kwargs):
        api_key = request.headers.get("X-API-Key") or request.args.get("api_key")
        if not api_key or api_key != API_KEY:
            return jsonify({"error": "Invalid or missing API key"}), 401
        return f(*args, **kwargs)
    return decorated

@app.route("/", methods=["GET"])
def index():
    return jsonify({
        "service": "DeepSeek OCR API",
        "version": "1.0",
        "endpoints": {
            "/health": "Check service status",
            "/ocr/pdf": "Extract text from PDF (POST, multipart/form-data)",
            "/ocr/image": "Extract text from image (POST, multipart/form-data)",
            "/ocr/url": "Extract text from webpage (POST, JSON)"
        },
        "authentication": "X-API-Key header or api_key query parameter"
    })

@app.route("/health", methods=["GET"])
def health():
    """Public health check"""
    try:
        resp = requests.get(f"{OCR_SERVICE_URL}/health", timeout=5)
        data = resp.json()
        return jsonify({
            "status": "ok",
            "ocr_service": data,
            "api_ready": True
        })
    except Exception as e:
        return jsonify({
            "status": "error",
            "error": str(e),
            "api_ready": False
        }), 503

@app.route("/ocr/pdf", methods=["POST"])
@require_api_key
def ocr_pdf():
    """OCR a PDF file - extract all text from all pages"""
    if "file" not in request.files:
        return jsonify({"error": "No file provided. Use multipart/form-data with field 'file'"}), 400
    
    file = request.files["file"]
    if not file.filename:
        return jsonify({"error": "Empty filename"}), 400
    
    if not file.filename.lower().endswith(".pdf"):
        return jsonify({"error": "Only PDF files supported for this endpoint"}), 400
    
    try:
        start_time = time.time()
        max_pages = request.form.get("max_pages", 100, type=int)
        
        # Forward to internal OCR service
        files = {"file": (file.filename, file.stream, "application/pdf")}
        data = {"max_pages": max_pages}
        
        logger.info(f"Processing PDF: {file.filename}, max_pages={max_pages}")
        
        resp = requests.post(
            f"{OCR_SERVICE_URL}/ocr",
            files=files,
            data=data,
            timeout=600  # 10 min timeout for large books
        )
        
        result = resp.json()
        elapsed = time.time() - start_time
        
        if result.get("success"):
            text = result.get("text", "")
            words = text.split() if text else []
            return jsonify({
                "success": True,
                "text": text,
                "word_count": len(words),
                "char_count": len(text) if text else 0,
                "filename": file.filename,
                "elapsed_seconds": round(elapsed, 2)
            })
        else:
            return jsonify(result), 422
            
    except requests.Timeout:
        return jsonify({"error": "OCR processing timed out. Try with fewer pages."}), 504
    except Exception as e:
        logger.error(f"OCR error: {e}")
        return jsonify({"error": str(e)}), 500

@app.route("/ocr/image", methods=["POST"])
@require_api_key
def ocr_image():
    """OCR a single image file"""
    if "file" not in request.files:
        return jsonify({"error": "No file provided"}), 400
    
    file = request.files["file"]
    if not file.filename:
        return jsonify({"error": "Empty filename"}), 400
    
    allowed_ext = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]
    if not any(file.filename.lower().endswith(ext) for ext in allowed_ext):
        return jsonify({"error": f"Supported formats: {allowed_ext}"}), 400
    
    try:
        start_time = time.time()
        
        files = {"file": (file.filename, file.stream, file.content_type or "image/jpeg")}
        
        logger.info(f"Processing image: {file.filename}")
        
        resp = requests.post(
            f"{OCR_SERVICE_URL}/ocr_image",
            files=files,
            timeout=120
        )
        
        result = resp.json()
        elapsed = time.time() - start_time
        
        text = result.get("text", "")
        words = text.split() if text else []
        
        return jsonify({
            "success": True,
            "text": text,
            "word_count": len(words),
            "char_count": len(text) if text else 0,
            "filename": file.filename,
            "elapsed_seconds": round(elapsed, 2)
        })
        
    except Exception as e:
        logger.error(f"Image OCR error: {e}")
        return jsonify({"error": str(e)}), 500

@app.route("/ocr/url", methods=["POST"])
@require_api_key
def ocr_url():
    """Capture and OCR a webpage"""
    data = request.get_json(silent=True) or {}
    url = data.get("url")
    
    if not url:
        return jsonify({"error": "No URL provided in JSON body"}), 400
    
    try:
        start_time = time.time()
        
        logger.info(f"Processing URL: {url}")
        
        resp = requests.post(
            f"{OCR_SERVICE_URL}/scan_url",
            json={"url": url},
            timeout=120
        )
        
        result = resp.json()
        elapsed = time.time() - start_time
        
        text = result.get("text", "")
        words = text.split() if text else []
        
        return jsonify({
            "success": result.get("success", False),
            "text": text,
            "word_count": len(words),
            "char_count": len(text) if text else 0,
            "url": url,
            "elapsed_seconds": round(elapsed, 2)
        })
        
    except Exception as e:
        logger.error(f"URL OCR error: {e}")
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    port = int(os.environ.get("OCR_API_PORT", 5004))
    logger.info(f"Starting public OCR API on port {port}")
    app.run(host="127.0.0.1", port=port, debug=False)
