#!/usr/bin/env python3
"""
DeepSeek OCR - Public Proxy API (stable)

Why this exists:
- Browsers upload PDFs/images to a public endpoint
- The public endpoint proxies work to the internal GPU OCR service (localhost:5003)
- The UI polls progress from the internal service via this proxy

Auth:
- X-API-Key header (preferred)
- or ?api_key=... query param

Endpoints:
- GET  /health                         -> proxy internal /health
- GET  /ocr/status                     -> proxy internal /jobs
- GET  /ocr/progress/<job_id>          -> proxy internal /progress/<job_id>
- POST /ocr/pdf                        -> proxy internal /ocr (PDF)
- POST /ocr/image                      -> proxy internal /ocr_image (image)
- POST /ocr/url                        -> proxy internal /scan_url (url screenshot + OCR)
"""

from __future__ import annotations

import os
import uuid
from functools import wraps
from pathlib import Path
from typing import Any, Dict, Tuple

import requests
from flask import Flask, jsonify, request
from flask_cors import CORS

app = Flask(__name__)
CORS(app, origins="*")

# Config
OCR_SERVICE_URL = os.environ.get("OCR_SERVICE_URL", "http://127.0.0.1:5003").rstrip("/")
OCR_API_KEY = os.environ.get("OCR_API_KEY", "deepseek-ocr-2024-secret-key")
TIMEOUT_SECONDS = int(os.environ.get("OCR_API_TIMEOUT", "10800"))  # 60 minutes


def require_api_key(fn):
    @wraps(fn)
    def wrapper(*args, **kwargs):
        api_key = request.headers.get("X-API-Key") or request.args.get("api_key")
        if not api_key or api_key != OCR_API_KEY:
            return jsonify({"error": "Invalid or missing API key"}), 401
        return fn(*args, **kwargs)

    return wrapper


def _proxy_json_get(path: str, timeout: int = 5) -> Tuple[Dict[str, Any], int]:
    resp = requests.get(f"{OCR_SERVICE_URL}{path}", timeout=timeout)
    try:
        return resp.json(), resp.status_code
    except Exception:
        return {"error": "Invalid JSON from internal OCR service"}, 502


@app.route("/", methods=["GET"])
def index():
    return jsonify(
        {
            "service": "DeepSeek OCR API",
            "version": "2.1-proxy",
            "timeout_seconds": TIMEOUT_SECONDS,
            "internal_ocr_service": OCR_SERVICE_URL,
            "endpoints": {
                "/health": "Check service status",
                "/ocr/status": "List active OCR jobs (proxy internal /jobs)",
                "/ocr/progress/<job_id>": "Get OCR job progress (proxy internal /progress/<job_id>)",
                "/ocr/pdf": "OCR a PDF (POST multipart/form-data: file, max_pages?, job_id?)",
                "/ocr/image": "OCR an image (POST multipart/form-data: file)",
                "/ocr/url": "OCR a webpage screenshot (POST JSON: {url})",
            },
            "auth": "X-API-Key header or ?api_key=",
        }
    )


@app.route("/health", methods=["GET"])
def health():
    try:
        data, status = _proxy_json_get("/health", timeout=5)
        if status != 200:
            return jsonify({"status": "error", "api_ready": False, "ocr_service": data}), 503
        return jsonify(
            {
                "status": "ok",
                "api_ready": True,
                "active_jobs": data.get("active_jobs", 0),
                "ocr_service": data,
            }
        )
    except Exception as e:
        return jsonify({"status": "error", "api_ready": False, "error": str(e)}), 503


@app.route("/ocr/status", methods=["GET"])
def ocr_status():
    try:
        data, status = _proxy_json_get("/jobs", timeout=5)
        if status != 200:
            return jsonify({"error": "Failed to query internal OCR jobs", "details": data}), 502
        # Normalize shape to what the UI expects
        return jsonify(
            {
                "active_jobs": data.get("active_jobs", 0),
                "jobs": data.get("jobs", []),
            }
        )
    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/ocr/progress/<job_id>", methods=["GET"])
def ocr_progress(job_id: str):
    try:
        data, status = _proxy_json_get(f"/progress/{job_id}", timeout=5)
        return jsonify(data), status
    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/ocr/pdf", methods=["POST"])
@require_api_key
def ocr_pdf():
    if "file" not in request.files:
        return jsonify({"error": "No file provided. Use multipart/form-data with field 'file'"}), 400

    file = request.files["file"]
    if not file.filename:
        return jsonify({"error": "Empty filename"}), 400

    if not file.filename.lower().endswith(".pdf"):
        return jsonify({"error": "Only PDF files supported"}), 400

    job_id = request.form.get("job_id") or str(uuid.uuid4())[:8]
    max_pages = request.form.get("max_pages", 100, type=int)

    # Forward to internal service
    files = {"file": (file.filename, file.stream, "application/pdf")}
    data = {"max_pages": max_pages, "job_id": job_id}

    try:
        resp = requests.post(
            f"{OCR_SERVICE_URL}/ocr",
            files=files,
            data=data,
            timeout=TIMEOUT_SECONDS,
        )
        payload = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {"raw": resp.text}
        # Ensure job_id is always present in response
        if isinstance(payload, dict) and "job_id" not in payload:
            payload["job_id"] = job_id
        return jsonify(payload), resp.status_code
    except requests.Timeout:
        return jsonify({"error": "OCR processing timed out", "job_id": job_id}), 504
    except Exception as e:
        return jsonify({"error": str(e), "job_id": job_id}), 500


@app.route("/ocr/image", methods=["POST"])
@require_api_key
def ocr_image():
    if "file" not in request.files:
        return jsonify({"error": "No file provided. Use multipart/form-data with field 'file'"}), 400

    file = request.files["file"]
    if not file.filename:
        return jsonify({"error": "Empty filename"}), 400

    suffix = Path(file.filename).suffix.lower()
    allowed = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"}
    if suffix and suffix not in allowed:
        return jsonify({"error": f"Unsupported image type. Supported: {sorted(allowed)}"}), 400

    try:
        files = {"file": (file.filename, file.stream, file.content_type or "image/jpeg")}
        resp = requests.post(f"{OCR_SERVICE_URL}/ocr_image", files=files, timeout=300)
        payload = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {"raw": resp.text}
        # Add a lightweight job_id for UI consistency
        if isinstance(payload, dict) and "job_id" not in payload:
            payload["job_id"] = str(uuid.uuid4())[:8]
        return jsonify(payload), resp.status_code
    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/ocr/url", methods=["POST"])
@require_api_key
def ocr_url():
    data_in = request.get_json(silent=True) or {}
    url = data_in.get("url") or request.form.get("url")
    if not url:
        return jsonify({"error": "No URL provided"}), 400

    try:
        resp = requests.post(f"{OCR_SERVICE_URL}/scan_url", json={"url": url}, timeout=300)
        payload = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {"raw": resp.text}
        if isinstance(payload, dict) and "job_id" not in payload:
            payload["job_id"] = str(uuid.uuid4())[:8]
        return jsonify(payload), resp.status_code
    except Exception as e:
        return jsonify({"error": str(e)}), 500

