
    Qi                    
   d Z ddlmZ ddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZ ddlZddlmZmZmZ ddlmZ  ee      Z eed	
       ej,                  j/                  dd      j1                  d      Zej,                  j/                  dd      Z eej,                  j/                  dd            Zd Zd$d%dZej?                  ddg      d        Z ej?                  ddg      d        Z!ej?                  ddg      d        Z"ej?                  ddg      d&d       Z#ej?                  ddg      ed               Z$ej?                  d dg      ed!               Z%ej?                  d"dg      ed#               Z&y)'a
  
DeepSeek OCR - Public Proxy API (stable)

Why this exists:
- Browsers upload PDFs/images to a public endpoint
- The public endpoint proxies work to the internal GPU OCR service (localhost:5003)
- The UI polls progress from the internal service via this proxy

Auth:
- X-API-Key header (preferred)
- or ?api_key=... query param

Endpoints:
- GET  /health                         -> proxy internal /health
- GET  /ocr/status                     -> proxy internal /jobs
- GET  /ocr/progress/<job_id>          -> proxy internal /progress/<job_id>
- POST /ocr/pdf                        -> proxy internal /ocr (PDF)
- POST /ocr/image                      -> proxy internal /ocr_image (image)
- POST /ocr/url                        -> proxy internal /scan_url (url screenshot + OCR)
    )annotationsNwraps)Path)AnyDictTuple)Flaskjsonifyrequest)CORS*)originsOCR_SERVICE_URLzhttp://127.0.0.1:5003/OCR_API_KEYzdeepseek-ocr-2024-secret-keyOCR_API_TIMEOUT10800c                .     t                fd       }|S )Nc                     t         j                  j                  d      xs t         j                  j                  d      }|r	|t        k7  rt        ddi      dfS  | i |S )Nz	X-API-Keyapi_keyerrorzInvalid or missing API keyi  )r   headersgetargsr   r   )r   kwargsr   fns       /var/www/html/leadgen/ocr_api.pywrapperz require_api_key.<locals>.wrapper-   s[    //%%k2Qgll6F6Fy6Q'[0G%ABCSHH4"6""    r   )r   r   s   ` r   require_api_keyr!   ,   s     
2Y# # Nr    c                    t        j                  t         |  |      }	 |j                         |j                  fS # t
        $ r	 ddidfcY S w xY w)Ntimeoutr   z&Invalid JSON from internal OCR service  )requestsr   r   jsonstatus_code	Exception)pathr$   resps      r   _proxy_json_getr,   7   s[    <<?+D62GDDHyy{D,,,, HABCGGHs   = AAGET)methodsc                 D    t        ddt        t        ddddddd	d
d      S )NzDeepSeek OCR APIz	2.1-proxyzCheck service statusz+List active OCR jobs (proxy internal /jobs)z8Get OCR job progress (proxy internal /progress/<job_id>)z?OCR a PDF (POST multipart/form-data: file, max_pages?, job_id?)z-OCR an image (POST multipart/form-data: file)z+OCR a webpage screenshot (POST JSON: {url}))/health/ocr/status/ocr/progress/<job_id>/ocr/pdf
/ocr/image/ocr/urlzX-API-Key header or ?api_key=)serviceversiontimeout_secondsinternal_ocr_service	endpointsauth)r   TIMEOUT_SECONDSr    r    r   indexr>   ?   s<    )".$31L*d]MI 4	
 r    r0   c                     	 t        dd      \  } }|dk7  rt        dd| d      dfS t        d	d
| j                  dd      | d      S # t        $ r$}t        ddt	        |      d      dfcY d }~S d }~ww xY w)Nr0      r#      r   F)status	api_readyocr_servicei  okTactive_jobsr   )rB   rC   rF   rD   )rB   rC   r   r,   r   r   r)   strdatarB   es      r   healthrL   T   s    V&y!<fS=gERVWXZ]]]!#xxq9#	
 	
  V'AOPRUUUVs"   %A A 	A5A0*A50A5r1   c                    	 t        dd      \  } }|dk7  rt        d| d      dfS t        | j                  dd	      | j                  d
g       d      S # t        $ r"}t        dt	        |      i      dfcY d }~S d }~ww xY w)Nz/jobsr@   r#   rA   z!Failed to query internal OCR jobs)r   detailsr%   rF   r   jobs)rF   rO   r     rG   rI   s      r   
ocr_statusrQ   f   s    /&w:fS=%HUYZ[]```#xxq9,
 	
  /Q()3../s"   $A -A 	B A;5B ;B r2   c                    	 t        d|  d      \  }}t        |      |fS # t        $ r"}t        dt        |      i      dfcY d }~S d }~ww xY w)Nz
/progress/r@   r#   r   rP   )r,   r   r)   rH   )job_idrJ   rB   rK   s       r   ocr_progressrT   w   sV    /&F8'<aHft}f$$ /Q()3../s   " 	AAAAr3   POSTc                     dt         j                  vrt        ddi      dfS t         j                  d   } | j                  st        ddi      dfS | j                  j	                         j                  d      st        ddi      dfS t         j                  j                  d      xs  t        t        j                               d d	 }t         j                  j                  d
dt              }d| j                  | j                  dfi}||d}	 t        j                  t         d||t               }|j"                  j                  dd      j%                  d      r|j'                         nd|j(                  i}t+        |t,              r	d|vr||d<   t        |      |j.                  fS # t        j0                  $ r t        d|d      dfcY S t2        $ r#}t        t        |      |d      dfcY d }~S d }~ww xY w)Nfiler   ;No file provided. Use multipart/form-data with field 'file'  Empty filenamez.pdfzOnly PDF files supportedrS      	max_pagesd   )typezapplication/pdf)r\   rS   z/ocr)filesrJ   r$   content-type application/jsonrawzOCR processing timed out)r   rS   i  rP   )r   r_   r   filenamelowerendswithformr   rH   uuiduuid4intstreamr&   postr   r<   r   
startswithr'   text
isinstancedictr(   Timeoutr)   )rW   rS   r\   r_   rJ   r+   payloadrK   s           r   ocr_pdfrs      s    W]]"!^_`beee== D==!123S88== ))&1!;<=sBB\\h'@3tzz|+<Ra+@F  c <I dmmT[[2CDEE"f5DA}}t$#	
 "&!1!1."!E!P!PQc!d$))+kprvr{r{j|gt$)@ &GHw!1!111 U!;vNOQTTT AQ6:;S@@As%   BF- -#G=G=G82G=8G=r4   c                 `   dt         j                  vrt        ddi      dfS t         j                  d   } | j                  st        ddi      dfS t	        | j                        j
                  j                         }h d}|r||vrt        ddt        |       i      dfS 	 d| j                  | j                  | j                  xs dfi}t        j                  t         d	|d
      }|j                  j                  dd      j                  d      r|j!                         nd|j"                  i}t%        |t&              r'd|vr#t)        t+        j,                               d d |d<   t        |      |j.                  fS # t0        $ r"}t        dt)        |      i      dfcY d }~S d }~ww xY w)NrW   r   rX   rY   rZ   >   .bmp.gif.jpg.png.jpeg.tiff.webpz#Unsupported image type. Supported: z
image/jpegz
/ocr_image,  )r_   r$   r`   ra   rb   rc   rS   r[   rP   )r   r_   r   rd   r   suffixre   sortedrk   content_typer&   rl   r   r   r   rm   r'   rn   ro   rp   rH   rh   ri   r(   r)   )rW   r}   allowedr_   r+   rr   rK   s          r   	ocr_imager      s    W]]"!^_`beee== D==!123S88$-- ''--/FIG&'#FvgFW!XYZ\___	/$--d6G6G6W<XY}}0
;5RUV!%!1!1."!E!P!PQc!d$))+kprvr{r{j|gt$)@ #DJJL 1"1 5GHw!1!111 /Q()3../s   #CF 	F-F("F-(F-r5   c                    t        j                  d      xs i } | j                  d      xs t         j                  j                  d      }|st	        ddi      dfS 	 t        j                  t         dd|id	      }|j                  j                  d
d      j                  d      r|j                         nd|j                  i}t        |t              r'd|vr#t        t        j                                d d |d<   t	        |      |j"                  fS # t$        $ r"}t	        dt        |      i      dfcY d }~S d }~ww xY w)NT)silenturlr   zNo URL providedrY   z	/scan_urlr|   )r'   r$   r`   ra   rb   rc   rS   r[   rP   )r   get_jsonr   rg   r   r&   rl   r   r   rm   r'   rn   ro   rp   rH   rh   ri   r(   r)   )data_inr   r+   rr   rK   s        r   ocr_urlr      s    d+1rG
++e

7 0 0 7C!234c99/}}0	:%WZ[!%!1!1."!E!P!PQc!d$))+kprvr{r{j|gt$)@ #DJJL 1"1 5GHw!1!111 /Q()3../s   B7D 	E D=7E=E)r@   )r*   rH   r$   rj   returnzTuple[Dict[str, Any], int])rS   rH   )'__doc__
__future__r   osrh   	functoolsr   pathlibr   typingr   r   r	   r&   flaskr
   r   r   
flask_corsr   __name__appenvironr   rstripr   r   rj   r<   r!   r,   router>   rL   rQ   rT   rs   r   r   r=   r    r   <module>r      s  * # 	    # #  ) ) Ho S#  **..!24KLSSTWXjjnn],JKbjjnn%6@AH 3  !( 9ug&V 'V" =5'*/ +/  #eW5/ 6/ :x(!A  )!AH <&*/  +/2 :x(/  )/r    