
    Qi$                       d dl mZmZmZmZmZ d dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZmZ d dlmZ d dlmZ d dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z& d d	l'm(Z(m)Z) d d
l*m+Z+  ee,      Z- e.e      e-j.                  d<   de-j.                  d<    e/ej`                  jc                  dd            Z2 e$e,      Z3 e       Z4 e
jj                         Z6 ee      dz  dz  Z7dZ8dZ9e!Z: e/ej`                  jc                  dd            Z; e/ej`                  jc                  dd            Z<i a= ej|                         Z?da@ e
j                         ZBd ZCd ZDi aE e
j                         ZF e
j                         ZG e
j                         ZHd ZId ZJdbdZKe8fdZLdcd ZM eI        d! ZNddd"eOd#eOfd$ZPd% ZQd& ZRded'ZSdfd(ZTd) ZUd* ZV eN         eP        d+ ZWd, ZXd- ZYd. ZZe-j                  d/      d0        Z\e-j                  d1d2g3      d4        Z]e-j                  d5d2g3      d6        Z^e-j                  d7d8g3      d9        Z_e-j                  d:d8g3      d;        Z`e-j                  d<d8g3      d=        Zae-j                  d>d8g3      d?        Zbe-j                  d@      dA        Zce-j                  dB      dC        Zde-j                  dD      dE        Zee-j                  dFd8g3      dG        Zfe-j                  dH      dI        Zge-j                  dJd2g3      dK        ZhdcdLZidgdMZjdN Zke-j                  dOd8g3      dP        Zle-j                  dQd2g3      dR        Zme-j                  dSdTd2g3      dU        Zne-j                  dVdTd2g3      dW        Zoe-j                  dXd8g3      dY        Zpe-j                  dZd2g3      d[        Zqe,d\k(  r_ erd]        erd^       ej`                  jc                  d_d      j                         j                         d`v Zue-j                  eeeua       yy)h    )Flaskrequestjsonifyrender_templatesend_from_directoryN)ThreadPoolExecutoras_completedsecure_filenamePath)	RAGSystem)PDF_DIRPROCESSED_DIRWEB_HOSTWEB_PORT	NOTES_DIRDOCUMENTS_DIRDIGESTS_DIRENRICHMENT_DEFAULT_THREADSENRICHMENT_MAX_THREADSENRICHMENT_STATUS_PATHENRICHMENT_LOG_PATH)
get_loggerlog_performancelog_error_with_context)FileUploadErrorValidationError)RAGAgentUPLOAD_FOLDERi   MAX_CONTENT_LENGTHMAX_CONCURRENT_UPLOADS   metadatazprocessing_status.jsoni  i,  )ENRICHMENT_STATUS_STALE_THRESHOLD_SECONDSi  #ENRICHMENT_STATUS_HEARTBEAT_SECONDS   Fc                      t         5  t        r
	 ddd       ydat        j                  t        d      } | j                          t        j                  d       ddd       y# 1 sw Y   yxY w)z9Start the processing worker thread if not already runningNTtargetdaemonz Processing worker thread started)processing_worker_lockprocessing_worker_running	threadingThreadprocessing_workerstart
app_loggerinfo)worker_threads    '/var/www/html/leadgen/airagagent/app.pystart_processing_workerr6   >   s`     
  <$< < %)!!((0A$O:;< < <s   A#AA##A,c                     t         j                  d       	 	 	 t        j                  d      } 	 t         j                  dt        j                                  |         t        j                          	 `# t        j
                  $ r, t        j                         rt         j                  d       Y w xY w# t        $ r"}t         j                  d|        Y d}~~d}~ww xY w# t        j                          w xY w# t        $ r7}t         j                  d|        t        j                  d	       Y d}~d}~ww xY w)
z?Worker thread that processes files from the queue one at a timez/Processing worker started, waiting for files...<   timeoutz5Processing worker idle for 60s, continuing to wait...z0Processing worker picked up a file, queue size: zProcessing worker error: Nz$Processing worker unexpected error:    )r2   r3   processing_queuegetqueueEmptyemptyqsize	Exceptionerror	task_donetimesleep)process_funces     r5   r0   r0   I   s    OOEF
	/33B3?-"RScSiSiSkRl mn !**,% 
 ;; #))+OO$[\	  B  #<QC!@AAB !**, 	CA3GHJJqMM	sd   A7 1B9 !D  7<B63D  5B66D  9	C$CC' C$$C' 'C==D   	E 	-D;;E c                  4   t         j                         r1	 t        t         d      5 } t        j                  |       addd       yyi ay# 1 sw Y   xY w# t        j                  t        t        f$ r$}t        j                  d|        i aY d}~yd}~ww xY w)z Load processing status from filerNz"Failed to load processing status: )PROCESSING_STATUS_FILEexistsopenjsonloadprocessing_statusJSONDecodeErrorIOErrorOSErrorr2   warningfrH   s     r5   load_processing_statusrW   i   s     $$&	#,c2 1a$(IIaL!1 1 1 1$$gw7 	#!CA3GH "	#s-   A A	A 	AA B3BBc                  &   	 t         j                  j                  dd       t        t         d      5 } t	        j
                  t        | d       ddd       y# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zSave processing status to fileTparentsexist_okw   indentNz"Failed to save processing status: )
rK   parentmkdirrM   rN   dumprP   rB   r2   rC   rU   s     r5   save_processing_statusrc   v   ~    C%%++D4+H(#. 	6!II'15	6 	6 	6 C=aSABBC4   1A% AA% A"A% "A% %	B.BB c                 B   t        j                          }t        |       }t        5  t        j	                  |i       j                         }|xs i }d|vr|j	                  d|      |d<   d|vrd|v r|d   |d<   d|vrd|v r|d   |d<   d|vrd|v r|d   |d<   ||d<   |j                  ||||d       |j                         D ]  \  }}	|dvs|	||<    t        ||j	                  d|      z
  d      |d	<   |t        |<   t                d
d
d
       y
# 1 sw Y   y
xY w)z=Update processing status for a file with persistent metadata.
started_at	file_size	client_iporiginal_namefilename)statusprogressmessageupdated)rh   ri   rj   rk   r]   elapsed_secondsN)
rE   r   status_lockrP   r=   copyupdateitemsroundrc   )
rl   rm   rn   ro   extranowsafe_filenameentrykeyvalues
             r5   update_processing_statusr}      sS   
))+C#H-M	 !!%%mR8==?u$"'))L#">E,e#u(<!&{!3E+e#u(<!&{!3E+%'Ou,D%*?%;E/")j 	
 	  ++- 	#JCSS"c
	# $)uyys/K)KQ#O +0-( 5! ! !s   B)D<DDc           	         t        j                          }d}t        5  t        t        j	                               D ]q  }t        j                  |i       }|j                  d      }|j                  d|j                  d|            }|dv sQ||z
  | kD  sZt        j                  |d       d}s |r
t                ddd       y# 1 sw Y   yxY w)zORemove stale completed/failed processing entries to keep the status file small.Frm   rp   rh   )	completedfailedNT)rE   rr   listrP   keysr=   poprc   )retention_secondsrx   removedrl   rz   rm   rp   s          r5   cleanup_processing_statusr      s    
))+CG	 	%.3356 	H%))(B7EYYx(Fii	599\3+GHG00cGmGX5X!%%h5	 "$	% 	% 	%s   A.CC&CCc                 x   |xs t        j                          }|j                         }| |d<   |j                  d      xs |j                  d      xs |}||d<   |j                  d|      }||d<   t        ||z
  d      |d<   ||z
  }t        |d      |d<   |j                  d      d	k(  xr	 |t        kD  |d
<   |S )z5Prepare a processing status entry for JSON responses.rl   rh   rp   last_updater]   rq   stale_secondsrm   
processingstale)rE   rs   r=   rv   !PROCESSING_STATUS_STALE_THRESHOLD)rl   rz   rx   
normalizedstartedr   	stale_ages          r5   normalize_processing_entryr      s    

CJ%Jznn\*NjnnY.GN3G&J|..G4K +J}$)#-$;J !k!I"'	1"5J$..2lBtyStGtJw    c            	         dddddt         i t        j                         d} t        j                         r0	 t	        t        d      5 }t        j                  |      addd       n| j                         at        j                  di        t        j                  dt                t        j                  d	t        j                                y# 1 sw Y   sxY w# t        $ r | j                         aY w xY w)
z Load enrichment status from fileidler   rf   )rm   rn   ro   files_processedtotal_filesthread_countactive_fileslast_activityrJ   Nr   r   r   )r   rE   ENRICHMENT_STATUS_FILErL   rM   rN   rO   enrichment_statusrB   rs   
setdefault)default_statusrV   s     r5   load_enrichment_statusr      s     2	N $$&	6,c2 1a$(IIaL!1
 +//1  4  1KL  $))+>1 1 	6 . 3 3 5	6s)   C CC CC C76C7force_resetreturnc                 l   t        j                          }t        5  t        j                  di        t        j                  dt               t        j                  d      xs t        j                  d      xs |}t        j                  d      xs i }|r?	 t        t        |      t        d |j                         D        t        |                  }t        d|t        |      z
        }t        |d      t        d	<   | r2t        j                  d
ddddi ||d       t                	 ddd       yt        j                  d      dk(  rE|t        kD  r<t        j                  ddt        |       di ||d       t                	 ddd       yddd       y# t        $ r Y w xY w# 1 sw Y   yxY w)zqReset stale enrichment state (e.g. service restarted mid-run).

    Returns True if the status was modified.
    r   r   r   rp   c              3   Z   K   | ]#  }t        |j                  d d      xs d       % yw)rp   r   N)floatr=   ).0vs     r5   	<genexpr>z,cleanup_enrichment_status.<locals>.<genexpr>   s%     XQquuY27a8Xs   )+)defaultg        r]   r   r   r   zEnrichment status cleared)rm   rn   ro   r   r   r   rp   r   NTrm   runningr   z$Enrichment stalled (no activity for zs). Please restart enrichment.)rm   ro   r   rp   r   F)rE   rr   r   r   r   r=   maxr   valuesrB   rv   rt   save_enrichment_statusr%   int)r   rx   r   r   r   s        r5   cleanup_enrichment_statusr      s   
 ))+C	 )$$^R8$$^5OP)--o>iBSBWBWXaBbifi(,,^<B #-(X,BUBUBWXbghubvw! Cu]';!;<-2=!-D/*$$ 6#$  "!$	& 	 #$=) )@   *i7MLu<u$$"A#mBTAUUst "!$&  #$S) ) )V ?  )V s8   A<F*>FAF*5AF*	F'$F*&F''F**F3c                  &   	 t         j                  j                  dd       t        t         d      5 } t	        j
                  t        | d       ddd       y# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zSave enrichment status to fileTrY   r\   r]   r^   Nz"Failed to save enrichment status: )
r   r`   ra   rM   rN   rb   r   rB   r2   rC   rU   s     r5   r   r     rd   re   c                    t        j                          | d|}	 t        j                  j                  dd       t        5  t        t        d      5 }|j                  t        j                  |      dz          ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)z2Append enrichment events to log file for auditing.)	timestampeventTrY   a
Nz&Failed to write enrichment log entry: )rE   r   r`   ra   enrichment_log_lockrM   writerN   dumpsrB   r2   debug)
event_typepayloadrz   rV   excs        r5   log_enrichment_eventr     s     YY[ E
I""(((E  	2)3/ 21

5)D012	2 	22 2	2 	2  IA#GHHIsM   'B$ B(B;BB$ B	BB!B$ !B$ $	C-C

Cc                 
   |t         j                  dt              }t        5  t         j	                  | ||||t        j
                         t        j
                         |d       t                ddd       y# 1 sw Y   yxY w)zUpdate enrichment statusNr   )rm   rn   ro   r   r   rp   r   r   )r   r=   r   rr   rt   rE   r   )rm   rn   ro   r   r   r   s         r5   update_enrichment_statusr   $  sr    (,,^=WX	 !   .&yy{!YY[(	"
 		 	 ! ! !s   AA99Bc                    t         5  t        j                  di       }|j                  | i       }|t        |      |d<   |t        |      |d<   |r||d<   t	        j                         |d<   t	        j                         t        d<   |r!t        j                  d      dk7  r	dt        d<   t                d d d        y # 1 sw Y   y xY w)	Nr   chunktotal_chunksro   rp   r   rm   r   )rr   r   r   r   rE   r=   r   )	file_namechunk_indexchunk_totalro   activerz   s         r5   update_enrichment_file_activityr   5  s    	 !"--nbA!!)R0" -E'N"$'$4E.!&E)99;i-1YY[/*'++H5B*3h' ! ! !s   B/B??Cc                     t         5  t        j                  di       }| |v r7|j                  | d        t	        j                         t        d<   t                d d d        y # 1 sw Y   y xY wNr   r   )rr   r   r   r   rE   r   )r   r   s     r5   clear_enrichment_file_activityr   F  sY    	 %"--nbAJJy$'15o."$% % %s   AA""A+c                      t         5  i t        d<   t        j                         t        d<   t                d d d        y # 1 sw Y   y xY wr   )rr   r   rE   r    r   r5   clear_all_enrichment_activityr   N  s<    	 !,..)-1YY[/* ! ! !s	   /?Ac                  h    t        t        d      st               t        _        t        j                  S )z5Get or create a thread-local RAGAgent for enrichment.	rag_agent)hasattrthread_local_agentr   r   r   r   r5   get_thread_rag_agentr   Y  s$    %{3'/z$'''r   c                     	 t        |       }t	        dt        |t                    S # t        t        f$ r	 t        cY S w xY w)z.Clamp requested thread count to allowed range.r;   )r   	TypeError
ValueErrorr   r   minr   )r|   counts     r5   sanitize_thread_countr   _  sC    *E
 q#e3455 z" *))*s   ' ??c                 ^    | rd| vry| j                  dd      d   j                         }|dv S )z"Check if file extension is allowed.Fr;   )pdftxt)rsplitlower)rl   exts     r5   allowed_filer   g  s8    s(*
//#q
!!
$
*
*
,C.  r   c                      t         5  t        d t        j                         D              cddd       S # 1 sw Y   yxY w)z5Return the number of files currently being processed.c              3   J   K   | ]  }|j                  d       dk(  sd  yw)rm   r   r;   N)r=   )r   rz   s     r5   r   z-get_active_processing_jobs.<locals>.<genexpr>q  s!     d		(@SWc@c1ds   ##N)rr   sumrP   r   r   r   r5   get_active_processing_jobsr   n  s6    	 ed"3":":"<dde e es   $5>/c                  .    t        dt        t              S )Nz
index.html)enrichment_default_threadsenrichment_max_threads)r   r   r   r   r   r5   indexr   s  s    #=5 r   z/uploadPOST)methodsc                      t        j                          t        j                  xs d	 t        j	                  d        dt        j
                  vr't        j                  d        t        ddi      dfS t        j
                  d   } | j                  dk(  r't        j                  d	        t        dd
i      dfS | rt        | j                        s4t        j                  d| j                   d        t        ddi      dfS t                t               }|t        k\  r6d| d}t        j                  d d|        t        ||t        d      dfS | j                  dt        j                         | j!                         | j                  d       t"        j$                  d   }|kD  r1t        j                  d d        t        dd|dz   di      dfS t'        | j                        t        j(                  j+                  t"        j$                  d         t        j(                  j-                        }|rcd }	 t/        d      5 }t1        j2                  |j5                               j7                         }d d d        | j                  d       | j5                         }t1        j2                  |      j7                         }	| j                  d       ||	k(  rt        j	                  d d        d}
	 t>        5  tA        tB        d!      rgtB        jD                  rWtA        tB        jD                  d"      r=tB        jD                  jF                  r#tI        tB        jD                  jF                        }
d d d        t        d$ d%d&|
d'      d(fS t        j	                  d) d*       dd+l&m'}  |       }|jQ                         }|v rtS        tT              d,z  d-z  }|j-                         r_	 t/        |d.      5 }tW        jX                  |      }d d d        v r|= t/        |d/      5 }tW        jZ                  ||d01       d d d        tS        tT              d"z  tS              j\                   d2z  }|j-                         r|j_                          t        j	                  d3 d4 d5        | ja                         fd6}tc        d7dd8d9| j                  d:;       td        jg                  |       ti                t        j	                  d< d=td        jk                                 t        j	                  d< d>       t        d$ d?d7dd@      dAfS # 1 sw Y   xY w# t8        t:        f$ r%}t        j=                  d|        d }Y d }~*d }~ww xY w# 1 sw Y   @xY w# tJ        $ r#}t        j=                  d#|        Y d }~jd }~ww xY w# 1 sw Y   xY w# 1 sw Y   xY w#  Y xY w# tJ        $ rR}t        j                          z
  }tm        t        |dB|dCdD       t        ddEto        |       i      dFfcY d }~S d }~ww xY w)GNunknownzFile upload request from filez&Upload request missing file part from rC   zNo file part  rf   z(Upload request with empty filename from zNo selected filezInvalid file type attempt: z from z<File type not allowed. Only PDF and text files are accepted.z!The processing queue already has zN active file(s). Please wait for current uploads to finish before adding more.zUpload throttled for : )rC   active_uploadsmax_uploadsi  r   r!   zFile too large: z bytes from z File too large. Maximum size is    MBi  r    rbz'Could not read existing file for hash: zDuplicate file detected: z (same content hash)vector_store	documentszCould not get document count: zFile "z9" already exists with identical content. Skipping upload.T)ro   	duplicatetotal_documents   zReplacing existing file: z (different content)PDFProcessorr$   processed_files.jsonrJ   r\   r]   r^   _chunks.jsonzSaving file:  (z bytes) from c                     	 t         dddddi       t        j                  d         t         ddd	dd
i       ddlm}   |        }|j                  t                    }|t        j                  d  d       t         dddddi       t        t              dz  t               j                   dz  }|j                         rM	 t        |d      5 } j                  |      }ddd       t        j                  dt        |       d         n,t        j                  d  d       t         dddddi       y|rt        |      dk(  rt               j                   j#                         }|d k(  }|r,t        j                  d!  d"       t         dd#d$dd%i       n-t        j%                  d&  d'       t         ddd(d)|d*       y	 ddl}	d+}
d,}d}d}d-t(        dd  d.t+        t-        j,                                }t         dd/d0| d1d2|d3       t        d4      5 }d5 |d6fi}|d7d8}	 t        j                  d9  d:| d1       |	j/                  |
||d;<      }|j0                  d=k(  r"|j3                         j5                  d>      rd?}nt        j%                  d@       d}ddd       |2t        d4      5 }d5 |d6fi}|	j/                  ||d;C      }dD}ddd       |j0                  d=k(  r|j3                         }|j5                  d>      r|j5                  dE      r|dE   }t        j                  dFt        |       dG         dH}g }t7        dt        |      |      D ]>  }||||z    }|j9                         s|j;                  || t        |      dI|dJdK       @ |rt        j                  dLt        |       dM         t        t              dz  t               j                   dz  }|j<                  j?                  dIdIN       	 t        |dO      5 }ddl} |j@                  ||dPQ       ddd       t        j                  dRt        |       dS|        t         ddUdV| dWt        |       dXdYt        |      |dZ       n%t        j%                  d[  d\|jB                          |rt        |      dk(  r,t        j                  d^  d_       t         ddd`ddai       yt        j                  dbt        |       dM         t         ddcddt        |       dedft        |      dg       t        j                  dht        |       di       t         ddjdkddli       tE        tF        dm      rtF        jH                  s(t        j                  dn       t         dddoddpi       ytF        jK                          t        j                  dqt        |       di       t         ddrdst        |       dtdut        |      dg       	 tL        5  tF        jH                  jO                  |       tF        jH                  jQ                          ddd       d}	 tE        tF        jH                  d      r=tF        jH                  jR                  r#t        tF        jH                  jR                        }t-        j,                         !z
  }tW        t        dz| |{       t        j                  d|  d}t        |       d~tY        |dP       d       t         dd7dt        |       dtY        |dP       ddt        |      tY        |dP      d       y# 1 sw Y   xY w# t        $ rJ}t        j                  d|        t         dddt        |       dt        |      d       Y d}~yd}~ww xY w# t        $ r$}t        j%                  dA| dB       Y d}~Pd}~ww xY w# 1 sw Y   ZxY w# 1 sw Y   3xY w# 1 sw Y   xY w# t        $ r#}t        j%                  dT|        Y d}~d}~ww xY w# t        $ r&}t        j%                  d]  d\|        Y d}~d}~ww xY w# 1 sw Y    xY w# t        $ rJ}t        j                  dv|        t         dddwt        |       dxt        |      d       Y d}~yd}~ww xY w# t        $ r#}t        jU                  dy|        Y d}~Fd}~ww xY w# t        $ rD}t        |      }t[        t        |d dd         t         ddd| d|d       Y d}~yd}~ww xY w)z*Process uploaded file in background threadr   
   zStarting file processing...stageinitializingrw   z*[Background] Step 1: Extracting text from r#   zExtracting text from PDF...extract_textr   r   Nz[Background] File z+ already processed, loading existing chunksr'   zLoading existing chunks...loading_existing_chunksr   r   rJ   z[Background] Loaded z existing chunks for z-[Background] Failed to load existing chunks: r   z Failed to load existing chunks: load_existing_chunks_failed)r   rC   z, was marked as processed but no chunks foundz,File marked as processed but no chunks foundmissing_chunks.pdfz*[Background] Normal extraction failed for z, trying OCR fallback...   z%Text extraction failed, trying OCR...ocr_fallbackz[Background] Text file z4 produced no chunks - file may be empty or too shortzOFile is empty or contains insufficient content (minimum ~100 characters needed)text_too_short)r   	file_typezhttp://127.0.0.1:5003/ocrzhttp://127.0.0.1:5002/ocrrag__   zStarting OCR (job: z)...ocr_starting)r   
ocr_job_idr   r   zapplication/pdfd   )job_id	max_pagesz%[Background] Trying DeepSeek-OCR for z
 (job_id: i0*  )filesdatar:   r   successDeepSeekz5[Background] DeepSeek-OCR failed, trying Tesseract...z![Background] DeepSeek-OCR error: z, trying Tesseract...)r  r:   	Tesseracttextz[Background] OCR extracted z chars from i  T)ocrocr_service)contentr  sourcechunk_idr$   z[Background] OCR created z chunks from rY   r\   r]   r^   z[Background] Saved z OCR chunks to z,[Background] Failed to save chunks to file: (   zOCR (z) extracted z chunks. Processing...ocr_complete)r   chunk_countr  z[Background] OCR failed for r   z$[Background] OCR fallback error for z)[Background] Failed to extract text from z& even with OCR. File may be corrupted.z7Failed to extract text. File may be corrupted or empty.extract_text_failedz[Background] Step 2: Extracted 2   z
Extracted z" chunks. Adding to vector store...chunks_ready)r   r   z[Background] Step 3: Adding z raw chunks to vector storer8   z Adding chunks to vector store...vector_store_preparer   z)[Background] Vector store not initializedVector store not initializedvector_store_missingz[Background] Step 4: Adding U   zAdding z chunks to vector store...vector_store_writez2[Background] Error adding chunks to vector store: zError adding to vector store: vector_store_errorz+[Background] Could not get document count: file_upload_processing)rl   ri   r   rj   z3[Background] Successfully processed uploaded file: r   z	 chunks, zs)r   z Successfully processed! Created z chunks in s)r   r   duration_secondsfile_processing)	operationrl   rj   z Error processing uploaded file: zError: ).r}   r2   r3   pdf_processorr   process_pdfr   r   stemrL   rM   rO   lenrB   rC   strsuffixr   rT   requestsry   r   rE   poststatus_coderN   r=   rangestripappendr`   ra   rb   r  r   
rag_systemr   initialize_modelvector_store_lockadd_documents
save_indexr   r   r   rv   r   )"r   	processorchunks
chunk_filerV   rN   rH   file_extis_pdfr5  deepseek_urltesseract_urlocr_responseocr_service_usedr  pdf_filer  ocr_data_formds_errorocr_dataocr_text
chunk_sizei
chunk_text
save_error	ocr_error	doc_countduration	error_msgrj   	file_pathri   rl   
start_times"                                r5   process_file_asyncz'upload_file.<locals>.process_file_async  sq
   ^( 1"N3 "LXJ WX( 1"N3 7(N	"..tI? >OO&8
Bm$no, $4&(AB "&m!4{!BXH[H[G\\hEi!iJ!((*#!%j#!6 6!)216&OO.B3v;-Odemdn,op #((+=hZGs)tu0$$J#*,<"= V!1#H~44::<H%/F"*TU]T^^v(wx0$(C#*N"; #**-DXJ  OC  ,D  E0$$m,<8"T Wk' (C(C'++/( (,M#2,>+?qTYY[AQ@R%S
 1$(1*TB,:*"U ")T2 xh%+hBS-T$UE7APS,TM	x *2WX`Waaklvkww{0| }/7}}\QV]jty}/z#/#;#;s#B|GXGXGZG^G^_hGi7A$4$.$6$69n$p37Lx (/!%i!6 ?()/(HFW1X(Y/7}}]RWaf}/g3> 0?
 (33s:'3'8'8':H'||I68<<;O+3F+; *2McRZm_\hiqhr0s t .2
)+).q#h-)L 	!+A19!AjL1IJ'1'7'7'9(.7A4>6>8;F@DUe8f7* )+	!+ $*$.OO6OPSTZP[}\ijris4t$u 26m1D{1RX\]eXfXkXkWllxUy1yJ$.$5$5$;$;DSW$;$X%x-1*c-B )Ka,7,5DIIfa,J)K )3:McRXk]Zijtiu8v(w %=(0(4(**/0@/AcRXk]Zp(q8FWZ[aWb  tD  /E%& '..1MhZWYZfZkZkYl/mn
 "S[A%5"((+TU]T^  _E  *F  G0$$U#*,A"B "A#f+m\d[e fg(  V-OP$23v;O ">s6{mKf gh( 6"$:; z>:*BYBY$$%PQ,  6&(>? ++-">s6{mKf gh( c&k]*DE$8VU* ="//==fE"//::<= 	Xz66DI`I`IjIj$'
(?(?(I(I$J	  99;3
,Dh'/(1.7(1	3 "UV^U__abeflbmannwx}  G  IJ  yK  xL  LN  !O  P(6s6{m;uU]_`OaNbbcd!,'*6{,1(A,>
C6 6  ) 	#&,,/\]^\_-`a4 ( ( !"B3q6( K0MX[\]X^&_ #	#` $- x * 2 25VW_V``u3v w wxx x ? ?@)K )K ,5 %x(2(:(:=ijtiu;v(w(w%x % k"**-QRZQ[[]^g]h+ijjkv= = ! 	$$'YZ[Y\%]^,  8QA(<s1vN 	  ! X$$'RSTRU%VWWX.  F	&"3Xab6xjA
 )i[)$,yA s  Cd. ^ ^.,^ ,d. Bd. Aa* 5`A9_$<a* `!0B*a* Ba* 0`; <`.,`; Aa* ;d. Cd. Ad. :b)  >b>b) d. 	Ac?  B d. ^^ 	_!A _d. _!!d. $	`-`````a* !`+&a* .`83`; ;	a'a"a* "a''a* *	b3bd. bd. b&!b) )	c<2A c72d. 7c<<d. ?	d+d& d. &d++d. .	e;7:e66e;r   z%File uploaded, starting processing...queued)r   rh   ri   rj   rk   r  File z( added to processing queue, queue size: z( saved, processing started in backgroundz4" uploaded successfully. Processing in background...)ro   rm   rl   ri   rn      file_uploadr.  rj   rT  z#Unexpected error during file uploadzUpload failed:   )8rE   r   remote_addrr2   r3   r  rT   r   rl   r   r   r   r"   seekosSEEK_ENDtellappconfigr   pathjoinrL   rM   hashlibmd5read	hexdigestrR   rS   r   r=  r   r;  r   r   r2  rB   r/  r   get_processed_filesr   r   rN   rO   rb   r1  unlinksaver}   r<   putr6   rA   r   r3  )r   active_jobsro   max_sizefile_existsexisting_hashrV   rH   new_file_contentnew_file_hashrS  r   r@  processed_filesmetadata_filer$   rB  rX  rT  rj   rV  ri   rl   rW  s                      @@@@@r5   upload_filerx  {  sH   J##0yIvC3I;?@&!G	{STG^45s::}}V$==B!I)UVG%7893>>
 <6!<T]]O6R[Q\]^G%cdegjjj 	"#02003K= AP Q  !6ykG9MN "-5  	  			!R[[!IIK			!::23x!1)LTUG'GU^H_G``b%cdegjjj"4==1GGLLO!<hG	 ggnnY/ M%)T* Fa$+KK$9$C$C$EMF IIaL#yy{#KK(89CCEMIIaL-";H:EY Z[	K* S":~>:CZCZ&z'>'>LQ[QhQhQrQr,/
0G0G0Q0Q,R	S !'z1jk!%'0   	  ";H:EY Z[6(N	"+"?"?"A.$($7*$DG]$]M$++-!!%mS!9 8Q+/99Q<8'83$,X$6!%mS!9 AQ $		(Aa @A
 "&m!4{!BXH[H[G\\hEi!iJ!((*"))+-zI;mI;WX		)`	 `	F	 	!3!(&&!%	
 	/0!%z)QRbRhRhRjQklm 	%z)QRSz)]^" "
   	eF FW% %  #J1#!NO $%S S ! K$$'EaS%IJJK(8 8A A! T
  C99;+'iXV1	

 ?3q6(!;<=sBBCs  AX2  AX2 	A
X2 AX2 'B	X2 1A*X2 V (2VV "A5X2 W! A8WW! X2 3AX2 X* X5X* X)X* 1DX2 VV W,WX2 WX2 WW! !	X*XX2 XX2 XX* X'"X* *X/,X2 2	Z;AZZZz/askc            	         t        j                          } t        j                  j                  dt        j                  xs d      }	 t        j
                  d      xs i }d|vrt        ddi      dfS t        |j                  dd            j                         }|st        dd	i      dfS t        t        d
      rt        j                  s:	 t        d       t        j                         }dt        _        t        d| d       t        j                  |      }t!        |t              rt        |g d      dfS t        |      dfS # t        $ r3}t        d|        t        ddt        |       i      dfcY d }~S d }~ww xY w# t        $ rR}t        j                          | z
  }t#        t$        |d||dd       t        ddt        |       i      dfcY d }~S d }~ww xY w)NzX-Forwarded-Forrf   TsilentquestionrC   zQuestion is requiredr   zQuestion cannot be empty_initializedz'Initializing RAG system for question...zSystem ready with z
 documentszError initializing RAG system: zSystem initialization failed: r^  )answersourcesr   question_processingr]  z)Unexpected error in ask_question endpointzError processing question: )rE   r   headersr=   r_  get_jsonr   r3  r9  r   r;  r}  printsetup_systemrB   search_and_answer
isinstancer   r2   )	rW  rj   r  r|  rS  init_eresultrH   rT  s	            r5   ask_questionr  8  s   J##$5w7J7J7PbQI#Ot,2T!G%;<=sBBtxx
B/0668G%?@A3FF z>2*:Q:Q_?@&335	*.
'*9+Z@A --h7fc"f<=sBB6?C''  _7x@A+I#f+)WXY[^^^_  O99;+/iU]^7	

 #>s1vh!GHI3NNOsa   ,F
 59F
 / F
 9E 	4F
 >F
 	F(F<F=F
 FF
 
	G%AG G% G%z/statusGETc                  P   	 d} d}	 t        t        d      rt        j                  rt        5  t        t        d      rgt        j                  rWt        t        j                  d      r=t        j                  j
                  r#t        t        j                  j
                        } t        j                  d u}d d d        no	 t        t              dz  }|dz  }|j                         rGt        |d      5 }t        j                  |      }t        |t              rt        |      nd} d d d        t        t'        j(                  d
            }t        t'        j(                  d            }t+        ||z   d d      }	g }
i }t        t              dz  dz  }|j                         r+	 t        |d      5 }t        j                  |      }d d d        |	D ]  }|j,                  |j/                         j0                  t3        |j/                         j0                  dz  d      |j/                         j4                  |j,                  |v |j,                  |v r,|j7                  |j,                  i       j7                  dd      ndd}|
j9                  |        t;        | t        |      t        |      t        |	      ||	D cg c]  }|j,                   c}|
d      dfS # 1 sw Y   xY w# 1 sw Y   xY w#  Y xY w# t         $ r#}t"        j%                  d	|        Y d }~d }~ww xY w# 1 sw Y   ixY w#  Y qxY wc c}w # t         $ rR}t"        j=                  d|        dd l} |j@                          t;        ddtC        |       i      dfcY d }~S d }~ww xY w)Nr   Fr}  r   r   
embeddingszdocuments_metadata.jsonrJ   zCould not get doc count: *.pdf*.txtc                 6    | j                         j                  S N)statst_mtime)xs    r5   <lambda>zget_status.<locals>.<lambda>  s    @Q@Q r   T)r{   reverser$   r   r   r]   r   )namesizesize_mbuploaded	processedrA  )r   	pdf_files	txt_filesr   model_loaded	file_listr  r   zStatus check failed: rC   r^  )"r   r;  r}  r=  r   r   r2  r   r   r   rL   rM   rN   rO   r  r   rB   r2   r   r   globsortedr  r  st_sizerv   r  r=   r:  r   rC   	traceback	print_excr3  )rS  r  embeddings_dirrw  rV   r$   rH   r  r  	all_filesr  rv  rV  	file_infor  s                  r5   
get_statusr  a  sJ   DI 		z>2z7N7N& Dz>:z?V?V":#:#:KHZMdMdMnMn(+J,C,C,M,M(NI#-#7#7t#CL	D D%)-%8<%GN$25N$NM$++-!-5 ['+yy|H9CHd9SHYZI[ g./	g./	9y06Q[_`	 	]+j8;QQ!-- 3&*iilO3
 # 		(I!!(00 !1!9!9[!I1M%NN,55&^^>[d[i[im|[|/--innbAEEmUVW  CDI Y'		( (YYy>(*34Q!&&4
   	]D D[ [ 	8<=	3 3& 5  I045	#8Q!ABCSHH	Is   M
 &L B
K7	L 3K9 43K,'K9 /A7M
 'L= 3L0	L= C/M
  MM
 K)$L ,K61K9 9K>;L 	L-
L("M
 (L--M
 0L:5L= =M?M
 
	N%AN N% N%z/reranker-statusc                      	 ddl m}  t         |              dfS # t        $ r t        ddd      dfcY S t        $ r#}t        dt        |      d      dfcY d}~S d}~ww xY w)	z!Get status of the reranker moduler   )get_reranker_statusr   FzReranker module not available)enabledrC   r^  N)rerankerr  r   ImportErrorrB   r3  )_get_statusrH   s     r5   r  r    sh    A?{}%s** Z53RSTVYYY A53q6:;S@@As    A A AA A z/ocr-statusc                  D   ddl } 	 i }g }	 | j                  dd      }|j                  dk(  r|j                         }	 | j                  d	d      }|j                  dk(  r |j                         j                  d
g       }t        |t        |      ||j                  d      dk(  rdndd      dfS # t        $ r}t        |      dd}Y d}~d}~ww xY w#  Y YxY w# t        $ r:}t        j                  d|        t        dt        |      i      dfcY d}~S d}~ww xY w)zCGet OCR service status and active jobs - for RAG system integrationr   Nzhttp://127.0.0.1:5003/health   r9   r   unavailable)rC   rm   zhttp://127.0.0.1:5003/jobsjobsrm   okzOCR service is readyzOCR service unavailable)r  rp  r  ro   zOCR status check failed: rC   r^  )
r5  r=   r7  rN   rB   r3  r   r2  r2   rC   )r5  
ocr_healthocr_jobshealth_resprH   	jobs_resps         r5   get_ocr_statusr    s7    /
	D",,'Eq,QK&&#-(--/
	 %A1MI$$+$>>+//; %x=1;1IT1Q-Wp	
 
  	  	D#&q6]CJ	D	  /4QC89Q()3../sX   C 2B1 AC  0C 1	C:CC CC CC 	D%/DDDz/ocr-progress/<job_id>c                 @   ddl }	 |j                  d|  d      }|j                  dk(  rt        |j	                               dfS t        d| d      d	fS # t
        $ r>}t        j                  d
|  d|        t        t        |      | d      dfcY d}~S d}~ww xY w)zDGet progress of a specific OCR job - proxies to internal OCR servicer   Nzhttp://127.0.0.1:5003/progress/r  r9   r   zJob not found)rC   r    zOCR progress check failed for r   r^  )	r5  r=   r7  r   rN   rB   r2   rC   r3  )r  r5  resprH   s       r5   get_ocr_progressr    s     A||=fXFPQ|Rs"499;',,_GH#MM A9&A3GHQ6:;S@@As#   ?A A 	B3BBBz/static/<path:filename>c                     t        d|       S )Nstatic)r   )rl   s    r5   serve_staticr    s    x22r   z/pdf/<path:filename>c                     ddl m}  ||       }t        |z  }|j                         r6|j                  j                         dk(  rt        t        t              |      S t        ddi      dfS )zServe PDF files for viewingr   r
   r  rC   zPDF not foundr  )	werkzeug.utilsr   r   rL   r4  r   r   r3  r   )rl   r   ry   pdf_paths       r5   	serve_pdfr    s_     /#H-M&HX__224>"3w<??G_-.33r   z/chunks/<path:filename>c                    ddl m} ddl}ddlm}  ||       } |t
              dz   ||      j                   dz  }|j                         rC	 t        |d      5 } |j                  |      }ddd       t        d|t              |d	      d
fS t        ddi      dfS # 1 sw Y   3xY w# t        $ r%}t        ddt        |       i      dfcY d}~S d}~ww xY w)z"Get chunks for a specific PDF filer   r
   Nr   r   r   rJ   T)r  rl   r   rA  r   rC   zError reading chunks: r^  zChunks not found for this filer  )r  r   rN   pathlibr   r   r1  rL   rM   rO   r   r2  rB   r3  )	rl   r   rN   r   ry   rB  rV   rA  rH   s	            r5   
get_chunksr    s     /#H-Mm${2]8K8P8P7QQ]5^^J
	Nj#& &!"1&) #F 	 
   !ABCSHH& &  	NG'=c!fX%FGH#MM	Ns6   B, B ."B,  B)%B, ,	C5CCCz/processing-statusc                     t                t                t        j                         } | i i d}t        5  t        j                         D ]U  \  }}t        |||       }|j                  d      dk(  r	||d   |<   1| |j                  d|       z
  t        k  sN||d   |<   W 	 ddd       t        |d         |d	<   t        |d         |d
<   t        |      dfS # 1 sw Y   8xY w)z9Return snapshot of all processing jobs (active + recent).)r   r   recent)rx   rm   r   r   r   r  Nactive_countrecent_countr   )rW   r   rE   rr   rP   ru   r   r=   #PROCESSING_STATUS_RETENTION_SECONDSr2  r   )rx   snapshotrl   rz   r   s        r5   list_processing_statusesr    s     
))+CH
 
 >0668 	>OHe3HeMJ~~h'<7/9"8,s;;?bb3=HX&x0	>>  #8H#56H^"8H#56H^8c!!> >s   A"C
CC#z"/processing-status/<path:filename>c           	         t        |       }t                t                t        j	                  |      }|rt        t        ||            dfS t        t              dz  dz  }|j                         rU	 t        |d      5 }t        j                  |      }ddd       |v r%t        ddd||   j	                  d	d
      d      dfS 	 t        dd
dd      dfS # 1 sw Y   DxY w#  Y !xY w)z Get processing status for a filer   r$   r   rJ   Nr   r  zFile already processedr   r   )rm   rn   ro   rA  r   zNo processing status found)rm   rn   ro   r  )r   rW   r   rP   r=   r   r   r   r   rL   rM   rN   rO   )rl   ry   rz   rw  rV   rv  s         r5   get_processing_statusr  #  s    $H-M !!-0E1-GH#MM ]+j8;QQ!-- 3&*iilO3 O3""-$'#;"1-"@"D"D]TU"V	$ 
   4 3
  	 	3 3s$   4C%  C0C% C"C% %C)z/enrichc            
      |   	 t        j                  d      xs i } | j                  d      }t        |      }t	                t                t        j                  d      dk(  rSt        ddt        j                  dd      t        j                  d	d
      t        j                  dt              d      dfS t        j                  t        |fd      }|j                          t        dd|d      dfS # t        $ r=}t        j                  d|        t        ddt!        |       i      dfcY d}~S d}~ww xY w)z%Trigger enrichment for new files onlyTrz  threadsrm   r   z>Enrichment is already running. Please wait for it to complete.rn   r   ro   rf   r   )rC   rm   rn   ro   r   i  )r*   argsr+   z0Enrichment started. Processing new files only...r   )ro   rm   r   r[  zError starting enrichment: rC   zFailed to start enrichment: r^  N)r   r  r=   r   r   r   r   r   r   r.   r/   enrich_documents_asyncr1   rB   r2   rC   r3  )r  requested_threadsr   enrichment_threadrH   s        r5   enrich_documentsr  F  sD   Pt,2 HHY/,->? 	 !#  *i7Y#-11*a@,00B? 1 5 5nF` a    &,,4JR^Q`imn!I(
  	 	  P6qc:;#?Ax!HIJCOOPs$   B4C5 7=C5 5	D;>2D60D;6D;c           
      ~   | t         } t        |       } 	 t        d|        t        ddddd|        t	        t        j                  d            }t        |      }|dk(  rt        dd	d
dd|        t        dd       yi }t        j                         r<t        j                  d      D ]$  }	 |j                  j                  dd      }d||<   & g }i }t        t              dz  dz  }|j                         r+	 t        |d      5 }	t        j                   |	      }ddd       |D ]3  }
|
j                  }|
j"                  |v s||vs#|j%                  |
       5 |st        dd	d|||        t        dd       yt        |      }t        d|D 	cg c]  }	|	j"                   c}	       t        ddd| d|  dd||        t'        t(        d      rt(        j*                  s	 t'        t(        d      rt(        j,                  s&t.        j1                  d       t        d dd!dd|        yt2        5  t(        j,                  j5                          t(        j7                          dt(        _        ddd       g }d}|D ]y  }
	 t=        |
| $      }|j%                  |       |d)z  }t?        d||z  d*z  z         }t        d||jA                  d+      xs d,|
j"                   |||        	 tC        |d-          { |D cg c](  }|jA                  d.      s|jA                  d/      s'|* }}tE        d0 |D              }t        |      }|rt        dd1d2|||        tG        |d)3      D ]  \  }}	 tI        |d-   d4       tK        |d-   |d/          t        d5|d-   t        |d/   xs g       6       t?        d1|t        |      z  d7z  z         }t        d|d8|d-    |||        tC        |d-          d|d/<    nt        dd<d=|||        |dkD  r
d>| d?| d@}n||z
  }||k(  rdA}n	dB| dC| dD}t        dd	||||        tM                t        d||||E       t.        jO                  dF| dG| dH| dI       y#  Y xY w# 1 sw Y   gxY w#  Y oxY wc c}	w # 1 sw Y   /xY w# t8        $ r>}t.        j1                  d"|        t        d dd#t;        |       dd|        Y d}~yd}~ww xY w# t8        $ r[}t.        j1                  d%|
j"                   d&|        |
j"                  d'd%|
j"                   d&t;        |       ddd(}Y d}~d}~ww xY w# t8        $ r Y w xY wc c}w # t8        $ rC}t.        j1                  d9|d-    d&|        t        d:|d-   t;        |      ;       Y d}~d}~ww xY w# tC        |d-          d|d/<   w xY w# t8        $ r^}t.        j1                  dJ|        t        dKt;        |      L       tM                t        d ddMt;        |       dd|        Y d}~yd}~ww xY w)NzMEnrich documents in background - only process files without cards (threaded).Nrun_started)r  r   r   z*Checking for files that need enrichment...r  r   r  zNo PDF files foundrun_completedzNo files to processro   z*_cards.json_cardsrf   Tr$   r   rJ   z%All files already have enriched cardszAll files already enrichedfiles_queued)r  r  zFound z file(s) to enrich. Starting (z thread(s))...r}  r   z)Vector store not available for enrichmentr   r%  z2Failed to initialize vector store for enrichment: z$Vector store initialization failed: )r   Error enriching r   Fr   r  ro   enriched_chunksr   r;   P   ro   	Finished r   r  r  c              3   X   K   | ]"  }t        |j                  d       xs g        $ yw)r  N)r2  r=   )r   rJ   s     r5   r   z)enrich_documents_async.<locals>.<genexpr>  s$     $cQS/@)A)GR%H$cs   (*Z   z-Updating vector store with enriched chunks...)r1   zIndexing enriched chunksvector_update)r   rA  r   zIndexed enriched chunks for zVector store update failed for vector_update_failedr   rC   _   z5No enriched chunks generated. Vector store unchanged.zEnrichment complete! Processed z file(s) with z total chunks.zNEnrichment completed but no files generated new cards. Check logs for details.z"Enrichment completed with issues. z succeeded, z	 skipped.)enrichedtotalrA  ro   zEnrichment completed: r   z files enriched, z total chunkszError in enrichment process: 
run_failed)rC   zEnrichment failed: )(r   r   r   r   r   r   r  r2  r   rL   r1  replacer   r   rM   rN   rO   r  r:  r   r;  r}  r   r2   rC   r=  load_existing_indexr<  rB   r3  process_file_for_enrichmentr   r=   r   r   	enumerater   %apply_enriched_chunks_to_vector_storer   r3   )r   r  r   existing_cards	card_filesource_stemfiles_to_enrichrv  rw  rV   rI  	file_stemtotal_files_to_processrH   resultsr   r  rn   rJ   enriched_resultstotal_chunks_processedenriched_countidxro   skipped_counts                            r5   r  r  h  s   1(6Lnb]LA A/[]^`acop g./	)n!$[#7KQPQS_` :OP &^^N; 	"+.."8"82"FK26N;/ ]+j8;QQ!-- 3&*iilO3
 " 	1H I}}/I^4S&&x0	1
 $[#7^`kmx  {G  H :VW!$_!5^O3TqAFF3TU +,,J<.Xfg"	
 z>2*:Q:Qz>:*BYBY$$%PQ,Xq:XZ[]^`lm ' 3++??A//1.2J+3  ( 	H
4XLY NN6"q O12H HBNNOH$

9%D9X]]O)D&.vf~>;	D (/`!!%%	2BquuM^G_A``!$$cRb$c!c-.$?&  ))9C 5V53F6NLfg9&.&QbJcd(vf~VYZ`arZsZywyVz{"2s3C/D)D(J#JKH,! 6vf~6FG'.$ 36&>B04F,-)5, %G& A77G~VlUmm{|G2^CM 66j>~>Nl[hZiirs c7<RTjlxy%'_~Mc  mC  MT  	U00@BXAYYj  lB  kC  CP  Q  	Ri3 3 4U&3 3    #UVWUX!YZ(18\]`ab]c\d6eghjkmyz    #3HMM?"QC!HI$MM$!1(--3q6(K'+#$6   a6 ! d$$'Fvf~FVVXYZX[%\]()?fVn\_`a\bccd 36&>B04F,-2  b8<=\Q8%' 10CCF8.LaQRT`aa	bso  A(W >.W -!Q9+W :R RR $W W 	2W <W R
$?W $AR( *R( 0>R.R( 6	W  S2AW (U6W <U)U)%U))AW .A6U.$BW 9Q>;W RR RW R% R( (	S/14S*%W *S//W 2	U;AUW UW 	U&"W %U&&W .	V:78V5/V=5V::V==WW 	X<AX77X<c                      j                   ddddd}	 t         j                   d       t         j                   dz  }|j	                         sFd	 j                    d
|d<   t        d j                          t         j                   |d          |S t        |d      5 }t        j                  |      }ddd       rt        |t              sFd j                    d|d<   t        d j                          t         j                   |d          |S t        |      }t         j                   d|d| d       t        d j                   ||       t               }t        j                          fd}t        j                  |d      }	|	j!                           fd}
	 |j#                  ||
|      }j%                          |r|j'                  d|t        |      d j                    dt        |       dd       t         j                   t        |      t        |      d        t        d! j                   t        |      "       |S d# j                    d|d<   t         j                   |d          t        d$ j                          	 |S # 1 sw Y   xY w# j%                          w xY w# t(        $ rr}d% j                    d&t+        |       |d<   t+        |      |d'<   t         j                   |d          t        d( j                   t+        |      )       Y d}~|S d}~ww xY w)*zWorker function to build enriched chunks for a single PDF.
    
    Args:
        pdf_file: Path object for the PDF file
        thread_count: Number of threads to use for chunk-level parallel processing
    Frf   Nr   r  zLoading chunksr  r   zNo chunks found for z, skipping.ro   file_missing_chunks)r   rJ   z Invalid or empty chunk data for r   file_invalid_chunksz&Generating knowledge cards (starting, z thread(s))r   r   ro   file_started)r   r   r   c                      j                  t              s-	 t         j                         j                  t              s,y y # t        $ r Y #w xY wr  )waitr&   r   r  rB   )rI  
stop_events   r5   	heartbeatz.process_file_for_enrichment.<locals>.heartbeatC  sD     oo&IJ3HMMB !oo&IJ ! s   A 	AATr)   c           
      F    t        j                  | |d|  d| d       y )NzGenerating knowledge cards (r   )r  )r   r  )currentr  rI  s     r5   progress_callbackz6process_file_for_enrichment.<locals>.progress_callbackO  s*    +#!6wiqqI	r   )r  r   r  r   z enriched chunks))r  r  r   ro   zCards ready for indexingfile_completed)r   r  z!No enriched chunks generated for file_no_cardsr  r   rC   
file_errorr  )r  r   r   r1  rL   r   rM   rN   rO   r  r   r2  r   r.   Eventr/   r1   build_enriched_chunkssetrt   rB   r3  )rI  r   r  rB  rV   chunks_datar   agentr  heartbeat_threadr  r  rH   r  s   `            @r5   r  r    s    FFM'?OP"l%CC
  ""6x}}o[ QF9 !6X]]K+HMM6)CTUM*c" 	'a))A,K	' *[$"?"B8==/QR SF9 !6X]]K+HMM6)CTUM+&'1R]  jP  Q]  P^  ^i  hj  	k^(--[gst$&__&
	 %++9TJ 		#99+Yj  zF9  GONNMM#2"?3&x}}oRO8L7MM^_	  ,002	 !!1WZ[jWkl M #DHMM?RS TF9+HMM6)CTU x}}E M}	' 	'L NN&  M.x}}oRAxHya&w'vi?PQ\SVLLMMs_   BK K %J!;AK BK -J. BK AK !J+&K .K  K 	L>A'L99L>c                    |st         j                  d|  d       y	 t         j                  d|  dt        |       d       t        5  t
        j                  j                  | |       t
        j                  j                          ddd       t         j                  d|         y# 1 sw Y   "xY w# t        $ r!}t         j                  d|  d	|         d}~ww xY w)
z
    Safely replace existing vector store entries for a source with enriched chunks.
    
    This method uses replace_source_documents which rebuilds the index to ensure
    consistency and prevent index corruption (index/document count mismatch).
    z No enriched chunks provided for z, skipping vector store updateNzUpdating vector store for z with z enriched chunksz&Successfully updated vector store for z Error updating vector store for r   )r2   rT   r3   r2  r=  r;  r   replace_source_documentsr?  rB   rC   )source_namer  rH   s      r5   r  r  t  s     =k]Jhij4[MOH\G]]mno
  	1##<<[/Z##..0	1
 	@NO	1 	1  ;K=1#NOs/   +B5 	?B) B5 )B2.B5 5	C>CCz/enrichment-statusc                      t                t                t        j                  d      dk(  r6t        j                  d      r!t        j                  di       } | r	dt        d<   t	        t              dfS )zGet current enrichment statusrm   r   r   r   r   )r   r   r   r=   r   )r   s    r5   get_enrichment_statusr    sb     X&&05F5J5J>5Z(,,^R@*3h'$%s**r   z/enrichment-resetc                  X    t                t        d       t        ddit              dfS )z/Force-clear enrichment status (admin recovery).T)r   r  r   )r   r   r   r   r   r   r5   reset_enrichment_statusr    s-     $/It9'89:C??r   z/delete/<path:filename>DELETEc                 4   	 t        |       }t        |z  }|j                         st        ddi      dfS t        j                  d|        t        5  	 t        t        d      rft        j                  rVt        j                  j                  |       t        j                  j                          t        j                  d| d       d	d	d	       t        | d
z  t        | dz  t         | dz  g}g }|D ]j  }|j                         s	 |j#                          |j%                  t'        |j(                               t        j                  d|j(                          l t+        t,              dz  dz  }|j                         rx	 t/        |d      5 }t1        j2                  |      }	d	d	d	       |	v rI|	|= t/        |d      5 }t1        j4                  |	|d       d	d	d	       t        j                  d| d       	 |j#                          |j%                  |       t        j                  d|        t        dd| d|d      d fS # t        $ r#}t        j                  d|        Y d	}~d	}~ww xY w# 1 sw Y   xY w# t        $ r&}t        j                  d| d|        Y d	}~d	}~ww xY w# 1 sw Y   xY w# 1 sw Y   xY w# t        $ r"}t        j                  d|        Y d	}~d	}~ww xY w# t        $ r=}t        j7                  d|        t        ddt'        |       i      dfcY d	}~S d	}~ww xY w# t        $ r=}t        j7                  d!|        t        dd"t'        |       i      dfcY d	}~S d	}~ww xY w)#z7Delete a PDF/TXT file and all associated processed datarC   zFile not foundr  zDeleting file: r   zRemoved z from vector storez"Error removing from vector store: Nr   _cards.json_digest.jsonzDeleted processed file: Error deleting r   r$   r   rJ   r\   r]   r^   z from processed files metadatazError updating metadata: zDeleted source file: zError deleting source file: zFailed to delete source file: r^  TrZ  z and associated data deleted)r  ro   deleted_filesr   zError deleting file: zFailed to delete file: )r   r   rL   r   r2   r3   r=  r   r;  r   _remove_source_from_indexr?  rB   rT   r   r   r   rm  r:  r3  r  r   r   rM   rN   rO   rb   rC   )
rl   ry   rV  rH   processed_pathsr  	proc_pathrw  rV   rv  s
             r5   delete_filer!    s   CK'1m+	!G%567<</-9:  	MM:~6:;R;R++EEmT++668OOh}o=O$PQ	M }o\::=/55]O<88
 ( 	KI!K$$&!((Y^^)<=OO&>y~~>N$OP	K ]+j8;QQ!	D-- 3&*iilO3 O3'6mS1 @Q		/1Q?@OOh}o=[$\]
	V  /OO3M?CD
 }o-IJ*
  	 	W  M""%Gs#KLLM	M 	M. ! K&&2aS'IJJK3 3@ @  D""%>qc#BCCD  	V;A3?@G'Ec!fX%NOPRUUU	V  K045#:3q6(!CDEsJJKs  2M M JA6IAM AJ%'M K K/K 
K#!K 9L >M 	I?I:4J:I??JJM 	J>J93M 9J>>M KK KK 	L#L ;M  LM 	M2M	MM 	MM 	N2NNNz/delete-allc            	         	 t         j                  d       t        t        j                  d            } t        t        j                  d            }| |z   }d}g }|D ]  }	 t        |j                        }t        5  	 t        t        d      r/t        j                  rt        j                  j                  |       ddd       t        | d	z  t        | d
z  t         | dz  g}|D ]#  }	|	j#                         s|	j%                          % |j%                          |j'                  |       |dz  } t)        t*              dz  dz  }
|
j#                         r,	 t-        |
d      5 }t/        j0                  i |       ddd       t        5  	 t        t        d      rt        j                  rg t        j                  _        g t        j                  _        g t        j                  _        t        j                  j8                  Lt        j                  j8                  j:                  }t=        j>                  |      t        j                  _        t        j                  jA                          t         j                  d       ddd       tC        dd| d||d      dfS # t        $ r&}t         j                  d| d|        Y d}~d}~ww xY w# 1 sw Y   xY w# t        $ r0}t         j                  d|j                   d|        Y d}~d}~ww xY w# 1 sw Y   xY w# t        $ r#}t         j                  d|        Y d}~d}~ww xY w# t        $ r"}t         j                  d|        Y d}~d}~ww xY w# 1 sw Y   xY w# t        $ r=}t         jE                  d|        tC        ddtG        |       i      dfcY d}~S d}~ww xY w)z3Delete all PDFs/TXTs and rebuild empty vector storez.Deleting all files and rebuilding vector storer  r  r   r   zError removing z from vector store: Nr   r  r  r;   r  r   r$   r   r\   zError clearing metadata: zRebuilt empty vector storezError rebuilding vector store: TzDeleted z% files and rebuilt empty vector store)r  ro   r  deleted_countr   zError deleting all files: rC   zFailed to delete files: r^  )$r2   r3   r   r   r  r   r  r=  r   r;  r   r  rB   rT   r   r   r   rL   rm  r:  r   r   rM   rN   rb   r   searchable_texts_rawsearchable_texts_lowerr   dfaissIndexFlatIPr?  r   rC   r3  )r  r  r  r#  r  rV  ry   rH   r  r   rw  rV   	dimensions                r5   delete_all_filesr*    s   LLHI g./	g./		)	 # 	LIL /	 ? ' ee":~>:CZCZ&33MMm\e "}o\$BB=/ ==]O<"@@# "1 +I '')!((*+
   "$$]3"5	L@ ]+j8;QQ!D-- %IIb!$%  	JJ:~6:;R;R8:J++5CEJ++@EGJ++B!..44@$.$;$;$A$A$C$C	8=8I8I)8T
//5++668OO$@A	J !-0UV**	
 
  	] % e"**_]OK_`a_b+cdde	e e.  L""_Y^^4DBqc#JKKL% % D""%>qc#BCCD  J""%DQC#HIIJ	J 	J*  L5aS9:#;CF8!DEFKKLs
  AN "K'=K??J(>A K'?8K'7'N L0 +L#L0 
N NC8M
N (	K1KKKKK$	K''	L 0%LN L  N #L-(L0 0	M9MN MN 	N
(N NN

NNN 	O #2OO O z/reprocessing-statusc            
         t        t              dz  dz  } | j                         r8	 t        | d      5 }t	        j
                  |      }ddd       t              dfS t        ddd	d
dddd      dfS # 1 sw Y   +xY w# t        j                  t        t        f$ r"}t        j                  d|        Y d}~\d}~ww xY w)zGet current reprocessing statusr$   zreprocessing_status.jsonrJ   Nr   z$Failed to load reprocessing status: r   r   zNo reprocessing in progressrf   )rm   rn   ro   current_filer   r   chunks_created)r   r   rL   rM   rN   rO   r   rQ   rR   rS   r2   r   )status_filerV   rm   rH   s       r5   get_reprocessing_statusr/  =  s     }%
25OOK	Ik3' &11&6?C'' 0  	 & & $$gw7 	ICA3GHH	Is.   A? A3	A? 3A<8A? ?B?B::B?z/extract-entitiesc                     	 t        j                         } | rd| vrt        ddd      dfS ddl}| d   }i g i dd}g d	}|D ]K  }|j	                  ||j                               }|s&|j                  d
      j                         |d   d<    n g d}g }|D ]&  }	|	|j                         v s|j                  |	       ( |r||d<   g d}
g d}|j                         t        fd|
D              rd|d<   nt        fd|D              rd|d<   nd|d<   t        j                  d|        t        d|ddd      S # t        $ r;}t        j                  d|        t        dt        |      d      dfcY d}~S d}~ww xY w)z
    Extract user entities from conversation using simple pattern matching
    
    POST body:
    {
        "user_message": "I am a project manager...",
        "assistant_response": "Great! I can help..." (optional)
    }
    user_messageFzuser_message required)r  rC   r   r   N)demographics	interestssurvey_preferencesexpertise_level)z1I (?:am|work as) (?:a |an )?([a-z\s]+?)(?:\.|,|$)z%I\'m (?:a |an )?([a-z\s]+?)(?:\.|,|$)z3(?:my job|profession|role) is ([a-z\s]+?)(?:\.|,|$)r;   r2  
profession)
cannabisbusinessfinance	marketing
compliancewellnesshealthgrowingretailcultivationr3  )expertexperiencedsenioradvancedzyears of)znew tobeginnerzjust startedlearningc              3   &   K   | ]  }|v  
 y wr  r   r   phrasemessage_lowers     r5   r   z,extract_entities_endpoint.<locals>.<genexpr>  s     D6v&D   rA  r5  c              3   &   K   | ]  }|v  
 y wr  r   rH  s     r5   r   z,extract_entities_endpoint.<locals>.<genexpr>  s     HV=(HrK  rE  intermediatezExtracted entities: Tsimple_patternsg333333?)r  entitiesmethod
confidencezEntity extraction failed: r^  )r   r  r   researchr   groupr9  r:  anyr2   r3   rB   rC   r3  )r  rR  r1  rO  profession_patternspatternmatchinterest_keywordsfound_interestskeywordexpert_phrasesbeginner_phrasesrH   rJ  s                @r5   extract_entities_endpointr^  V  s   H!~T1 0   
 	N+"$#	

 + 	GIIg|'9'9';<E9>Q9M9M9O(6		

 ( 	0G,,,..&&w/	0 $3H[! UM$**,D^DD*2H&'H7GHH*4H&'*8H&'.xj9: '	
  	  5aS9:V
   	s0   )E ;E )AE /BE 	F0F;FF__main__zStarting Flask application...z/RAG system will be initialized on first request	RAG_DEBUG)1trueyeson)hostportr   )r   rf   Nr  )F)r   rf   r   r   N)NNN)r;   )wflaskr   r   r   r   r   ra  rE   rN   rh  r.   r>   concurrent.futuresr   r	   r  r   r  r   r  r'  mainr   re  r   r   r   r   r   r   r   r   r   r   r   logging_configr   r   r   
exceptionsr   r   mistral_integrationr   __name__rd  r3  r   environr=   r"   r2   r;  localr   rK   r  r   r   r%   r&   rP   Queuer<   r-   Lockr,   r6   r0   r   rr   r   r=  rW   rc   r}   r   r   r   boolr   r   r   r   r   r   r   r   r   r   r   router   rx  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r*  r/  r^  r  r9  r   r   runr   r   r5   <module>ru     s8   O O 	      ? *        O N 7 (Ho "'l

? #3

  RZZ^^,DbIJ  !
 [
$Y__&  m,z9<TT &* #$' !/  -0

?jlo0p,q )&)"**..9^`b*c&d #  5;;= ! ') 	<6  inn$inn& "INN$ C!@ 1T %   ?214 1D 1fCI!"!"%!    (6!e
 3  9vh'zC (zCx 6F8$&O %&OP 9ug&EI 'EIP w/A 0A =5'*/ +/@ #eW5A 6A $%3 &3 !"4 #4 $%I &I0 %1" 2". /0  1 D 9vh'P (PBsbjUn8 %1
+ 2
+ 1@ 2@ $x.@AEK BEKN =8V"45NL 6NL` !E73 40 1R 2Rj z	
)*	
;< JJNN;+11399;?YYEGGG6 r   