
    *i3                     Z   d dl mZmZmZmZmZ d dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZ d dlmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZm Z m!Z! d d	l"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z( d dl)m*Z*  ee+      Z,ejZ                  j]                  d      Z/e/s e0d      d Z1 e2e      e,jf                  d<   de,jf                  d<    e4ejZ                  j]                  dd            Z5 e#e+      Z6 e       Z7 e
jp                         Z9 ee      dz  dz  Z:dZ;dZ<e Z=i a>i a? e
j                         ZA e
j                         ZB e
j                         ZCd ZDd ZEdPdZFe;fdZGdQdZH eD        d ZId ZJd ZKdRd ZLdSd!ZMd" ZNd# ZO eI        d$ ZPd% ZQd& ZRd' ZSe,j                  d(      d)        ZUe,j                  d*d+g,      e1d-               ZVe,j                  d.d+g,      e1d/               ZWe,j                  d0d+g,      e1d1               ZXd2 ZYd3 ZZd4 Z[e,j                  d5d6g,      d7        Z\e,j                  d8      d9        Z]e,j                  d:      d;        Z^e,j                  d<      d=        Z_e,j                  d>d6g,      d?        Z`e,j                  d@      dA        Zae,j                  dBd+g,      dC        ZbdQdDZcdTdEZddF Zee,j                  dGd6g,      dH        Zfe,j                  dId6g,      dJ        Zge+dKk(  r% ehdL        ehdM       e,j                  eedNO       yy)U    )Flaskrequestjsonifyrender_templatesend_from_directoryN)ThreadPoolExecutoras_completedsecure_filenamePathwraps)	RAGSystem)
PDF_DIRPROCESSED_DIRWEB_HOSTWEB_PORT	NOTES_DIRDOCUMENTS_DIRENRICHMENT_DEFAULT_THREADSENRICHMENT_MAX_THREADSENRICHMENT_STATUS_PATHENRICHMENT_LOG_PATH)
get_loggerlog_performancelog_error_with_context)FileUploadErrorValidationError)RAGAgentFLASK_API_KEYz-FLASK_API_KEY environment variable is not setc                 .     t                fd       }|S )zCDecorator to require API key authentication for protected endpointsc                      t         j                  j                  d      xs t         j                  j                  d      }|r|j                  d      r|dd  }|r	|t        k7  rt        ddi      dfS  | i |S )Nz	X-API-KeyAuthorizationzBearer    errorz(Unauthorized: Invalid or missing API keyi  )r   headersget
startswithr!   r   )argskwargsapi_keyfs      +/var/www/html/eventheodds/airagagent/app.pydecorated_functionz+require_api_key.<locals>.decorated_function&   sw    //%%k2Zgoo6I6I/6Zw)))4abkG']2G%OPQSVVV$!&!!    r   )r-   r/   s   ` r.   require_api_keyr1   $   s!    
1X" " r0   UPLOAD_FOLDERi   MAX_CONTENT_LENGTHMAX_CONCURRENT_UPLOADS   metadatazprocessing_status.jsoni  i,  c                  4   t         j                         r1	 t        t         d      5 } t        j                  |       addd       yyi ay# 1 sw Y   xY w# t        j                  t        t        f$ r$}t        j                  d|        i aY d}~yd}~ww xY w)z Load processing status from filerNz"Failed to load processing status: )PROCESSING_STATUS_FILEexistsopenjsonloadprocessing_statusJSONDecodeErrorIOErrorOSError
app_loggerwarningr-   es     r.   load_processing_statusrF   J   s     $$&	#,c2 1a$(IIaL!1 1 1 1$$gw7 	#!CA3GH "	#s-   A A	A 	AA B3BBc                  &   	 t         j                  j                  dd       t        t         d      5 } t	        j
                  t        | d       ddd       y# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zSave processing status to fileTparentsexist_okw   indentNz"Failed to save processing status: )
r9   parentmkdirr;   r<   dumpr>   	ExceptionrB   r&   rD   s     r.   save_processing_statusrS   W   ~    C%%++D4+H(#. 	6!II'15	6 	6 	6 C=aSABBC4   1A% AA% A"A% "A% %	B.BBc                 B   t        j                          }t        |       }t        5  t        j	                  |i       j                         }|xs i }d|vr|j	                  d|      |d<   d|vrd|v r|d   |d<   d|vrd|v r|d   |d<   d|vrd|v r|d   |d<   ||d<   |j                  ||||d       |j                         D ]  \  }}	|dvs|	||<    t        ||j	                  d|      z
  d      |d	<   |t        |<   t                d
d
d
       y
# 1 sw Y   y
xY w)z=Update processing status for a file with persistent metadata.
started_at	file_size	client_iporiginal_namefilename)statusprogressmessageupdated)rW   rX   rY   rZ   rL   elapsed_secondsN)
timer   status_lockr>   r(   copyupdateitemsroundrS   )
r[   r\   r]   r^   extranowsafe_filenameentrykeyvalues
             r.   update_processing_statusrm   `   sS   
))+C#H-M	 !!%%mR8==?u$"'))L#">E,e#u(<!&{!3E+e#u(<!&{!3E+%'Ou,D%*?%;E/")j 	
 	  ++- 	#JCSS"c
	# $)uyys/K)KQ#O +0-( 5! ! !s   B)D<DDc           	         t        j                          }d}t        5  t        t        j	                               D ]q  }t        j                  |i       }|j                  d      }|j                  d|j                  d|            }|dv sQ||z
  | kD  sZt        j                  |d       d}s |r
t                ddd       y# 1 sw Y   yxY w)zORemove stale completed/failed processing entries to keep the status file small.Fr\   r_   rW   )	completedfailedNT)ra   rb   listr>   keysr(   poprS   )retention_secondsrh   removedr[   rj   r\   r_   s          r.   cleanup_processing_statusrv      s    
))+CG	 	%.3356 	H%))(B7EYYx(Fii	599\3+GHG00cGmGX5X!%%h5	 "$	% 	% 	%s   A.CC&CCc                 x   |xs t        j                          }|j                         }| |d<   |j                  d      xs |j                  d      xs |}||d<   |j                  d|      }||d<   t        ||z
  d      |d<   ||z
  }t        |d      |d<   |j                  d      d	k(  xr	 |t        kD  |d
<   |S )z5Prepare a processing status entry for JSON responses.r[   rW   r_   last_updaterL   r`   stale_secondsr\   
processingstale)ra   rc   r(   rf   !PROCESSING_STATUS_STALE_THRESHOLD)r[   rj   rh   
normalizedstartedrx   	stale_ages          r.   normalize_processing_entryr      s    

CJ%Jznn\*NjnnY.GN3G&J|..G4K +J}$)#-$;J !k!I"'	1"5J$..2lBtyStGtJwr0   c            	         dddddt         i t        j                         d} t        j                         r0	 t	        t        d      5 }t        j                  |      addd       n| j                         at        j                  di        t        j                  dt                t        j                  d	t        j                                y# 1 sw Y   sxY w# t        $ r | j                         aY w xY w)
z Load enrichment status from fileidler    )r\   r]   r^   files_processedtotal_filesthread_countactive_fileslast_activityr8   Nr   r   r   )r   ra   ENRICHMENT_STATUS_FILEr:   r;   r<   r=   enrichment_statusrR   rc   
setdefault)default_statusr-   s     r.   load_enrichment_statusr      s     2	N $$&	6,c2 1a$(IIaL!1
 +//1  4  1KL  $))+>1 1 	6 . 3 3 5	6s)   C CC CC C76C7c                  &   	 t         j                  j                  dd       t        t         d      5 } t	        j
                  t        | d       ddd       y# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zSave enrichment status to fileTrH   rK   rL   rM   Nz"Failed to save enrichment status: )
r   rO   rP   r;   r<   rQ   r   rR   rB   r&   rD   s     r.   save_enrichment_statusr      rT   rU   c                    t        j                          | d|}	 t        j                  j                  dd       t        5  t        t        d      5 }|j                  t        j                  |      dz          ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)z2Append enrichment events to log file for auditing.)	timestampeventTrH   a
Nz&Failed to write enrichment log entry: )ra   r   rO   rP   enrichment_log_lockr;   writer<   dumpsrR   rB   debug)
event_typepayloadrj   r-   excs        r.   log_enrichment_eventr      s     YY[ E
I""(((E  	2)3/ 21

5)D012	2 	22 2	2 	2  IA#GHHIsM   'B$ B(B;BB$ B	BB!B$ !B$ $	C-C

Cc                 
   |t         j                  dt              }t        5  t         j	                  | ||||t        j
                         t        j
                         |d       t                ddd       y# 1 sw Y   yxY w)zUpdate enrichment statusNr   )r\   r]   r^   r   r   r_   r   r   )r   r(   r   rb   rd   ra   r   )r\   r]   r^   r   r   r   s         r.   update_enrichment_statusr      sr    (,,^=WX	 !   .&yy{!YY[(	"
 		 	 ! ! !s   AA99Bc                    t         5  t        j                  di       }|j                  | i       }|t        |      |d<   |t        |      |d<   |r||d<   t	        j                         |d<   t	        j                         t        d<   |r!t        j                  d      dk7  r	dt        d<   t                d d d        y # 1 sw Y   y xY w)	Nr   chunktotal_chunksr^   r_   r   r\   running)rb   r   r   intra   r(   r   )	file_namechunk_indexchunk_totalr^   activerj   s         r.   update_enrichment_file_activityr      s    	 !"--nbA!!)R0" -E'N"$'$4E.!&E)99;i-1YY[/*'++H5B*3h' ! ! !s   B/B??Cc                     t         5  t        j                  di       }| |v r7|j                  | d        t	        j                         t        d<   t                d d d        y # 1 sw Y   y xY wNr   r   )rb   r   r   rs   ra   r   )r   r   s     r.   clear_enrichment_file_activityr      sY    	 %"--nbAJJy$'15o."$% % %s   AA""A+c                      t         5  i t        d<   t        j                         t        d<   t                d d d        y # 1 sw Y   y xY wr   )rb   r   ra   r    r0   r.   clear_all_enrichment_activityr      s<    	 !,..)-1YY[/* ! ! !s	   /?Ac                  h    t        t        d      st               t        _        t        j                  S )z5Get or create a thread-local RAGAgent for enrichment.	rag_agent)hasattrthread_local_agentr    r   r   r0   r.   get_thread_rag_agentr     s$    %{3'/z$'''r0   c                     	 t        |       }t	        dt        |t                    S # t        t        f$ r	 t        cY S w xY w)z.Clamp requested thread count to allowed range.   )r   	TypeError
ValueErrorr   maxminr   )rl   counts     r.   sanitize_thread_countr     sC    *E
 q#e3455 z" *))*s   ' ??c                 ^    | rd| vry| j                  dd      d   j                         }|dv S )z"Check if file extension is allowed.Fr   )pdftxt)rsplitlower)r[   exts     r.   allowed_filer     s8    s(*
//#q
!!
$
*
*
,C.  r0   c                      t         5  t        d t        j                         D              cddd       S # 1 sw Y   yxY w)z5Return the number of files currently being processed.c              3   J   K   | ]  }|j                  d       dk(  sd  yw)r\   rz   r   Nr(   ).0rj   s     r.   	<genexpr>z-get_active_processing_jobs.<locals>.<genexpr>  s!     d		(@SWc@c1ds   ##N)rb   sumr>   valuesr   r0   r.   get_active_processing_jobsr     s6    	 ed"3":":"<dde e es   $5>/c                  .    t        dt        t              S )Nz
index.html)enrichment_default_threadsenrichment_max_threads)r   r   r   r   r0   r.   indexr     s    #=5 r0   z/uploadPOST)methodsc                     t        j                          t        j                  xs d	 t        j	                  d        dt        j
                  vr't        j                  d        t        ddi      dfS t        j
                  d   } | j                  dk(  r't        j                  d	        t        dd
i      dfS | rt        | j                        s4t        j                  d| j                   d        t        ddi      dfS t                t               }|t        k\  r6d| d}t        j                  d d|        t        ||t        d      dfS | j                  dt        j                         | j!                         | j                  d       t"        j$                  d   }|kD  r1t        j                  d d        t        dd|dz   di      dfS t'        | j                        t        j(                  j+                  t"        j$                  d         t        j(                  j-                        }|rcd }	 t/        d      5 }t1        j2                  |j5                               j7                         }d d d        | j                  d       | j5                         }t1        j2                  |      j7                         }	| j                  d       ||	k(  rt        j	                  d d        d}
	 t>        5  tA        tB        d!      rgtB        jD                  rWtA        tB        jD                  d"      r=tB        jD                  jF                  r#tI        tB        jD                  jF                        }
d d d        t        d$ d%d&|
d'      d(fS t        j	                  d) d*       dd+l&m'}  |       }|jQ                         }|v rtS        tT              d,z  d-z  }|j-                         r_	 t/        |d.      5 }tW        jX                  |      }d d d        v r|= t/        |d/      5 }tW        jZ                  ||d01       d d d        tS        tT              d"z  tS              j\                   d2z  }|j-                         r|j_                          t        j	                  d3 d4 d5        | ja                         fd6}tc        d7dd8d9| j                  d:;       te        jf                  |d&<      }|ji                          t        j	                  d= d>       t        d$ d?d7dd@      dAfS # 1 sw Y   xY w# t8        t:        f$ r%}t        j=                  d|        d }Y d }~d }~ww xY w# 1 sw Y   xY w# tJ        $ r#}t        j=                  d#|        Y d }~Ed }~ww xY w# 1 sw Y   xY w# 1 sw Y   xY w#  Y xY w# tJ        $ rR}t        j                          z
  }tk        t        |dB|dCdD       t        ddEtm        |       i      dFfcY d }~S d }~ww xY w)GNunknownzFile upload request from filez&Upload request missing file part from r&   zNo file part  r   z(Upload request with empty filename from zNo selected filezInvalid file type attempt: z from z<File type not allowed. Only PDF and text files are accepted.z!The processing queue already has zN active file(s). Please wait for current uploads to finish before adding more.zUpload throttled for : )r&   active_uploadsmax_uploadsi  r   r3   zFile too large: z bytes from z File too large. Maximum size is    MBi  r2   rbz'Could not read existing file for hash: zDuplicate file detected: z (same content hash)vector_store	documentszCould not get document count: zFile "z9" already exists with identical content. Skipping upload.T)r^   	duplicatetotal_documents   zReplacing existing file: z (different content)PDFProcessorr6   processed_files.jsonr8   rK   rL   rM   _chunks.jsonzSaving file:  (z bytes) from c                  v
   	 t        dddddi       t        j                  d        t        ddd	dd
i       ddlm}   |        }|j                  t        
            }|t        j                  d d       t        dddddi       t        t              dz  t              j                   dz  }|j                         rP	 t        |d      5 }t        j                  |      }ddd       t        j                  dt        |       d        n,t        j                  d d       t        dddddi       y|rt        |      dk(  r,t        j                  d  d!       t        ddd"dd#i       yt        j                  d$t        |       d%        t        dd&d't        |       d(d)t        |      d*       t        j                  d+t        |       d,       t        dd-d.dd/i       t#        t$        d0      rt$        j&                  s(t        j                  d1       t        ddd2dd3i       yt$        j)                          t        j                  d4t        |       d,       t        dd5d6t        |       d7d8t        |      d*       	 t*        5  t$        j&                  j-                  |       t$        j&                  j/                          ddd       d}	 t#        t$        j&                  d      r=t$        j&                  j0                  r#t        t$        j&                  j0                        }t5        j4                         z
  }t7        t        d=||	>       t        j                  d? d@t        |       dAt9        |dB       dC       t        dDdEdFt        |       dGt9        |dB       dHdDt        |      t9        |dB      dI       y# 1 sw Y   xY w# t        $ rJ}t        j                  d|        t        dddt!        |       dt!        |      d       Y d}~yd}~ww xY w# 1 sw Y   hxY w# t        $ rJ}t        j                  d9|        t        ddd:t!        |       d;t!        |      d       Y d}~yd}~ww xY w# t        $ r#}t        j3                  d<|        Y d}~d}~ww xY w# t        $ rD}t!        |      }t;        t        |dJ	dKdL        t        dddM| d|d       Y d}~yd}~ww xY w)Nz*Process uploaded file in background threadrz   
   zStarting file processing...stageinitializingrg   z*[Background] Step 1: Extracting text from    zExtracting text from PDF...extract_textr   r   Nz[Background] File z+ already processed, loading existing chunks   zLoading existing chunks...loading_existing_chunksr   r   r8   z[Background] Loaded z existing chunks for z-[Background] Failed to load existing chunks: rp   z Failed to load existing chunks: load_existing_chunks_failed)r   r&   z, was marked as processed but no chunks foundz,File marked as processed but no chunks foundmissing_chunksz)[Background] Failed to extract text from z!. File may be corrupted or empty.z7Failed to extract text. File may be corrupted or empty.extract_text_failedz[Background] Step 2: Extracted z chunks from 2   z
Extracted z" chunks. Adding to vector store...chunks_ready)r   chunk_countz[Background] Step 3: Adding z raw chunks to vector store<   z Adding chunks to vector store...vector_store_preparer   z)[Background] Vector store not initializedVector store not initializedvector_store_missingz[Background] Step 4: Adding U   zAdding z chunks to vector store...vector_store_writez2[Background] Error adding chunks to vector store: zError adding to vector store: vector_store_errorz+[Background] Could not get document count: file_upload_processing)r[   rX   r   rY   z3[Background] Successfully processed uploaded file: r   z	 chunks, rL   zs)ro   d   z Successfully processed! Created z chunks in s)r   r   duration_secondsfile_processing)	operationr[   rY   z Error processing uploaded file: zError: )rm   rB   infopdf_processorr   process_pdfr   r   stemr:   r;   r<   r=   lenrR   r&   strr   
rag_systemr   initialize_modelvector_store_lockadd_documents
save_indexr   r   ra   r   rf   r   )r   	processorchunks
chunk_filer-   rE   	doc_countduration	error_msgrY   	file_pathrX   r[   
start_times            r.   process_file_asyncz'upload_file.<locals>.process_file_async  s   j( 1"N3 "LXJ WX( 1"N3 7(N	"..tI? >OO&8
Bm$no, $4&(AB "&m!4{!BXH[H[G\\hEi!iJ!((*#!%j#!6 6!)-16&OO.B3v;-Odemdn,op #((+=hZGs)tu0$$J#*,<"= V!1$$'PQYPZZ{%|},  Q&(=> "A#f+m\d[e fg(  V-OP$23v;O ">s6{mKf gh( 6"$:; z>:*BYBY$$%PQ,  6&(>? ++-">s6{mKf gh( c&k]*DE$8VU* ="//==fE"//::<= 	Xz66DI`I`IjIj$'
(?(?(I(I$J	  99;3
,Dh'/(1.7(1	3 "UV^U__abeflbmannwx}  G  IJ  yK  xL  LN  !O  P(6s6{m;uU]_`OaNbbcd!,'*6{,1(A,>
[6 6  ) 	#&,,/\]^\_-`a4 ( ( !"B3q6( K0MX[\]X^&_ #	#X= = ! 	$$'YZ[Y\%]^,  8QA(<s1vN 	  ! X$$'RSTRU%VWWX.  F	&"3Xab6xjA
 )i[)$,yA s   CS+ P O61,P ,S+ 
;S+ CS+ AS+ /Q& 5>Q3Q& ;S+ >AR< B S+ 6P ;P 	QA QS+ QS+ Q#Q& &	R9/A R4/S+ 4R99S+ <	S(S#S+ #S((S+ +	T84:T33T8rz   z%File uploaded, starting processing...queued)r   rW   rX   rY   rZ   r   targetdaemonzFile z( saved, processing started in backgroundz4" uploaded successfully. Processing in background...)r^   r\   r[   rX   r]      file_uploadr  rY   r  z#Unexpected error during file uploadzUpload failed:   )7ra   r   remote_addrrB   r  filesrC   r   r[   r   rv   r   r4   seekosSEEK_ENDtellappconfigr   pathjoinr:   r;   hashlibmd5read	hexdigestr@   rA   r   r  r   r	  r   r   r  rR   r  r   get_processed_filesr   r   r<   r=   rQ   r  unlinksaverm   	threadingThreadstartr   r  )r   active_jobsr^   max_sizefile_existsexisting_hashr-   rE   new_file_contentnew_file_hashr  r   r  processed_filesmetadata_filer6   r  r  processing_threadr  rY   r  rX   r[   r  s                       @@@@@r.   upload_filer<  '  s0    J##0yIAC3I;?@&!G	{STG^45s::}}V$==B!I)UVG%7893>>
 <6!<T]]O6R[Q\]^G%cdegjjj 	"#02003K= AP Q  !6ykG9MN "-5  	  			!R[[!IIK			!::23x!1)LTUG'GU^H_G``b%cdegjjj"4==1GGLLO!<hG	 ggnnY/ M%)T* Fa$+KK$9$C$C$EMF IIaL#yy{#KK(89CCEMIIaL-";H:EY Z[	K* S":~>:CZCZ&z'>'>LQ[QhQhQrQr,/
0G0G0Q0Q,R	S !'z1jk!%'0   	  ";H:EY Z[6(N	"+"?"?"A.$($7*$DG]$]M$++-!!%mS!9 8Q+/99Q<8'83$,X$6!%mS!9 AQ $		(Aa @A
 "&m!4{!BXH[H[G\\hEi!iJ!((*"))+-zI;mI;WX		)l	 l	^ 	!3!(&&!%	
 &,,4FtT! 	%z)QRSz)]^" "
   	{F FW% %  #J1#!NO $%S S ! K$$'EaS%IJJK(8 8A A! j  C99;+'iXV1	

 ?3q6(!;<=sBBCs  AX  AX 	A
X AX 'B	X 1A*X U8 (2U+U8 "A5X V< A8V/V< X 3AX X W+5X W8)X 1C9X +U50U8 8V,V'!X 'V,,X /V94V< <	W(W#X #W((X +W50X 8X=X X
X 	Y(AY#Y(#Y(z/askc            	         t        j                          } 	 t        j                         }|rd|vrt        ddi      dfS |d   j	                         }|st        ddi      dfS t        t        d      rt        j                  s:	 t        d       t        j                         }dt        _        t        d	| d
       |j                  dd      }|j                  d      }|j                  dd      }|j                  dd      }|j                  dd      }	|s|s|	s|r|r|j                  dg       }
|
s4|r2ddlm}  |       }|j                  |d      }|D cg c]  }|d   	 }
}t        j                   j#                  |
||      }t$        j'                  ||      t$        j)                  |      d}n\t        j+                  |||      }t$        j-                  ||      t$        j)                  |      d}nt        j/                  ||      }t1        |t              rt        |g d      dfS t        |      dfS # t        $ r3}t        d|        t        ddt        |       i      dfcY d }~S d }~ww xY wc c}w # t        $ rV}t        j                          | z
  }t3        t4        |dt6        |d d!       t        dd"t        |       i      dfcY d }~S d }~ww xY w)#Nquestionr&   zQuestion is requiredr   zQuestion cannot be empty_initializedz'Initializing RAG system for question...TzSystem ready with z
 documentszError initializing RAG system: zSystem initialization failed: r  kr5   domain_filteruse_keyword_focusedFextract_keywordsexpand_querykeywordsr   )get_keyword_managerr   )max_keywordskeyword)r@  domain)answersources)r@  r   question_processingr  z)Unexpected error in ask_question endpointzError processing question: )ra   r   get_jsonr   stripr   r	  r?  printsetup_systemrR   r  r(   keyword_managerrF  rC  r   keyword_focused_searchself_format_keyword_results_format_sources_from_resultsenhanced_search_format_enhanced_resultssearch_and_answer
isinstancer   rB   rY   )r  datar>  r  init_er@  rA  rB  rC  rD  rE  rF  km	extractedkwresultrE   r  s                     r.   ask_questionr`  p  s    JEO!z-G%;<=sBB
#))+G%?@A3FF z>2*:Q:Q_?@&335	*.
'*9+Z@A HHS!1"hh'<eD88$6>xx6 "2lm"88J3$4C,.B " 3 3H2 3 NI8AB"9BHB#00GGTU^kGl #::68L#@@H $33H-3X #;;FHM#@@H  11(a1@Ffc"f<=sBB6?C''Y  _7x@A+I#f+)WXY[^^^_*  C2  O99;+/iU]^7	

 #>s1vh!GHI3NNOsm   (J #J # J 9I  =BJ I?&CJ 3J  	I<	(I71I<2J 7I<<J 	K#AKK#K#z/add-insightc                  |   	 t        j                         } | rd| vrt        ddi      dfS | d   }| j                  di       }| j                  dd      }dt	        t        j
                                d	t        |      d
z   |i ||dt        j
                         ddd}t        j                  j                  |g       t        j                  dt        |       d       t        dd|d   d      S # t        $ r<}t        j                  dt        |       d       t        ddi      dfcY d}~S d}~ww xY w)zLAdd successful strategy insights to the RAG vector store for global learningcontentr&   zContent is requiredr   r6   sourceapistrategy_insight__i'  strategy_insight)rc  typer   g      ?)idrb  r6   scorez,Added strategy insight to RAG vector store: z charsTz&Strategy insight added to vector storeri  )successr^   chunk_idzError adding insight: )exc_infozFailed to add insightr  N)r   rM  r   r(   r   ra   hashr	  r   r  rB   r  r  rR   r&   r  )rZ  rb  r6   rc  insight_chunkrE   s         r.   add_insightrp    sS   #@!y,G%:;<cAAy/88J+(E* &c$))+&6%7qg9N8OP *!YY[	 

 	--}o>Fs7|nTZ[\?%d+
  	  @1#a&:TJ!89:C??@s#   (C6 C
C6 6	D;?1D60D;6D;c           
      *   | sd| S g }| dd D ]W  }|j                  dd      }|j                  dg       }|s*|j                  |dd D cg c]  }|d    d	|d
    d c}       Y |r ddj                  t        |dd              S d| S c c}w )z9Format keyword-focused search results into answer format.#No relevant information found for: N   rb  r   keyword_matchesrL   rH  r   matchesz	 matches)z Found relevant information for: , r5   zSearch completed for: )r(   extendr(  set)resultsr>  rt  r_  rb  ru  ms          r.   rT  rT    s    4XJ?? O"1+ f**Y+**.3""X_`babXc#dSTq|nBq|nI$N#de	f 1$))CPRQR@S<T2U1VWW'z22 $es   B
c                 2   | sd| S t        d | dd D              }t               }| dd D ]L  }|j                  di       }|j                  dg       D ]#  }|j                  |j                  dd             % N |rd	j	                  |      nd}d
| d| dS )z2Format enhanced search results into answer format.rr  c              3   `   K   | ]&  }|j                  d i       j                  dd       ( yw)keyword_analysistotal_matchesr   Nr   )r   r_  s     r.   r   z+_format_enhanced_results.<locals>.<genexpr>  s+     mW]

#5r:>>PQRms   ,.Nrs  r}  matched_keywordsrI  generalrv  zFound relevant information in z	 domain (z keyword matches))r   rx  r(   addr(  )ry  r>  r~  domainsr_  analysismatch
domain_strs           r.   rW  rW    s    4XJ?? mahikjkalmmMeG"1+ 8::0"5\\"4b9 	8EKK		(I67	88
 (/7#IJ+J<yO`aar0   c                    g }t        | dd       D ]  \  }}|j                  di       }|j                  dd|dz          t        |j                  dd            d	kD  r|j                  dd      dd	 d
z   n|j                  dd      |j                  dd      d}d|v r8|d   }t        |j                  dg             |d<   |j                  dd      |d<   |j                  |        |S )z)Format search results into source format.Nr5   r6   rc  z	Document r   rb  r   r   z...rj  g        )rc  previewrj  r}  r  rt  keyword_score)	enumerater(   r  append)ry  rK  ir_  r6   rc  r  s          r.   rU  rU    s   Gwr{+ 	6::j"-ll8y1->?BEfjjQZ\^F_B`cfBfvzz)R0#6>lrlvlv  xA  CE  mFZZ-
 '01H(+HLL9KR,P(QF$%&.ll?C&HF?#v  Nr0   z/statusGETc                  P   	 d} d}	 t        t        d      rt        j                  rt        5  t        t        d      rgt        j                  rWt        t        j                  d      r=t        j                  j
                  r#t        t        j                  j
                        } t        j                  d u}d d d        no	 t        t              dz  }|dz  }|j                         rGt        |d      5 }t        j                  |      }t        |t              rt        |      nd} d d d        t        t'        j(                  d
            }t        t'        j(                  d            }t+        ||z   d d      }	g }
i }t        t              dz  dz  }|j                         r+	 t        |d      5 }t        j                  |      }d d d        |	D ]  }|j,                  |j/                         j0                  t3        |j/                         j0                  dz  d      |j/                         j4                  |j,                  |v |j,                  |v r,|j7                  |j,                  i       j7                  dd      ndd}|
j9                  |        t;        | t        |      t        |      t        |	      ||	D cg c]  }|j,                   c}|
d      dfS # 1 sw Y   xY w# 1 sw Y   xY w#  Y xY w# t         $ r#}t"        j%                  d	|        Y d }~d }~ww xY w# 1 sw Y   ixY w#  Y qxY wc c}w # t         $ rR}t"        j=                  d|        dd l} |j@                          t;        ddtC        |       i      dfcY d }~S d }~ww xY w)Nr   Fr?  r   r   
embeddingszdocuments_metadata.jsonr8   zCould not get doc count: *.pdfz*.txtc                 6    | j                         j                  S N)statst_mtime)xs    r.   <lambda>zget_status.<locals>.<lambda><  s    @Q@Q r0   T)rk   reverser6   r   r   rL   r   )namesizesize_mbuploaded	processedr  )r   	pdf_files	txt_filesr   model_loaded	file_listr   r   zStatus check failed: r&   r  )"r   r	  r?  r  r   r   r  r   r   r   r:   r;   r<   r=   rY  rq   rR   rB   r   r   globsortedr  r  st_sizerf   r  r(   r  r   r&   	traceback	print_excr  )r  r  embeddings_dirr:  r-   r6   rE   r  r  	all_filesr  r9  r  	file_infor  s                  r.   
get_statusr    sJ   DI 		z>2z7N7N& Dz>:z?V?V":#:#:KHZMdMdMnMn(+J,C,C,M,M(NI#-#7#7t#CL	D D%)-%8<%GN$25N$NM$++-!-5 ['+yy|H9CHd9SHYZI[ g./	g./	9y06Q[_`	 	]+j8;QQ!-- 3&*iilO3
 # 		(I!!(00 !1!9!9[!I1M%NN,55&^^>[d[i[im|[|/--innbAEEmUVW  CDI Y'		( (YYy>(*34Q!&&4
   	]D D[ [ 	8<=	3 3& 5  I045	#8Q!ABCSHH	Is   M
 &L B
K7	L 3K9 43K,'K9 /A7M
 'L= 3L0	L= C/M
  MM
 K)$L ,K61K9 9K>;L 	L-
L("M
 (L--M
 0L:5L= =M?M
 
	N%AN N% N%z/static/<path:filename>c                     t        d|       S )Nstatic)r   )r[   s    r.   serve_staticr  c  s    x22r0   z/pdf/<path:filename>c                     ddl m}  ||       }t        |z  }|j                         r6|j                  j                         dk(  rt        t        t              |      S t        ddi      dfS )zServe PDF files for viewingr   r
   z.pdfr&   zPDF not found  )	werkzeug.utilsr   r   r:   suffixr   r   r  r   )r[   r   ri   pdf_paths       r.   	serve_pdfr  g  s_     /#H-M&HX__224>"3w<??G_-.33r0   z/chunks/<path:filename>c                    ddl m} ddl}ddlm}  ||       } |t
              dz   ||      j                   dz  }|j                         rC	 t        |d      5 } |j                  |      }ddd       t        d|t              |d	      d
fS t        ddi      dfS # 1 sw Y   3xY w# t        $ r%}t        ddt        |       i      dfcY d}~S d}~ww xY w)z"Get chunks for a specific PDF filer   r
   Nr   r   r   r8   T)rk  r[   r   r  r   r&   zError reading chunks: r  zChunks not found for this filer  )r  r   r<   pathlibr   r   r  r:   r;   r=   r   r  rR   r  )	r[   r   r<   r   ri   r  r-   r  rE   s	            r.   
get_chunksr  q  s     /#H-Mm${2]8K8P8P7QQ]5^^J
	Nj#& &!"1&) #F 	 
   !ABCSHH& &  	NG'=c!fX%FGH#MM	Ns6   B, B ."B,  B)%B, ,	C5CCCz/processing-statusc                     t                t                t        j                         } | i i d}t        5  t        j                         D ]U  \  }}t        |||       }|j                  d      dk(  r	||d   |<   1| |j                  d|       z
  t        k  sN||d   |<   W 	 ddd       t        |d         |d	<   t        |d         |d
<   t        |      dfS # 1 sw Y   8xY w)z9Return snapshot of all processing jobs (active + recent).)r   r   recent)rh   r\   rz   r   rx   r  Nactive_countrecent_countr   )rF   rv   ra   rb   r>   re   r   r(   #PROCESSING_STATUS_RETENTION_SECONDSr  r   )rh   snapshotr[   rj   r}   s        r.   list_processing_statusesr    s     
))+CH
 
 >0668 	>OHe3HeMJ~~h'<7/9"8,s;;?bb3=HX&x0	>>  #8H#56H^"8H#56H^8c!!> >s   A"C
CC#z"/processing-status/<path:filename>c           	         t        |       }t                t                t        j	                  |      }|rt        t        ||            dfS t        t              dz  dz  }|j                         rU	 t        |d      5 }t        j                  |      }ddd       |v r%t        ddd||   j	                  d	d
      d      dfS 	 t        dd
dd      dfS # 1 sw Y   DxY w#  Y !xY w)z Get processing status for a filer   r6   r   r8   Nro   r   zFile already processedr   r   )r\   r]   r^   r  r   zNo processing status found)r\   r]   r^   r  )r   rF   rv   r>   r(   r   r   r   r   r:   r;   r<   r=   )r[   ri   rj   r:  r-   r9  s         r.   get_processing_statusr    s    $H-M !!-0E1-GH#MM ]+j8;QQ!-- 3&*iilO3 O3""-$'#;"1-"@"D"D]TU"V	$ 
   4 3
  	 	3 3s$   4C%  C0C% C"C% %C)z/enrichc            
      h   	 t        j                  d      xs i } | j                  d      }t        |      }t	                t
        j                  d      dk(  rSt        ddt
        j                  dd      t
        j                  d	d
      t
        j                  dt              d      dfS t        j                  t        |fd      }|j                          t        dd|d      dfS # t        $ r=}t        j                  d|        t        ddt        |       i      dfcY d}~S d}~ww xY w)z%Trigger enrichment for new files onlyT)silentthreadsr\   r   z>Enrichment is already running. Please wait for it to complete.r]   r   r^   r   r   )r&   r\   r]   r^   r   i  )r  r*   r  z0Enrichment started. Processing new files only...r~   )r^   r\   r   r  zError starting enrichment: r&   zFailed to start enrichment: r  N)r   rM  r(   r   r   r   r   r   r0  r1  enrich_documents_asyncr2  rR   rB   r&   r  )rZ  requested_threadsr   enrichment_threadrE   s        r.   enrich_documentsr    s>   Pt,2 HHY/,->? 	   *i7Y#-11*a@,00B? 1 5 5nF` a    &,,4JR^Q`imn!I(
  	 	  P6qc:;#?Ax!HIJCOOPs$   B*C+ -=C+ +	D142D,&D1,D1c           
      ~   | t         } t        |       } 	 t        d|        t        ddddd|        t	        t        j                  d            }t        |      }|dk(  rt        dd	d
dd|        t        dd       yi }t        j                         r<t        j                  d      D ]$  }	 |j                  j                  dd      }d||<   & g }i }t        t              dz  dz  }|j                         r+	 t        |d      5 }	t        j                   |	      }ddd       |D ]3  }
|
j                  }|
j"                  |v s||vs#|j%                  |
       5 |st        dd	d|||        t        dd       yt        |      }t        d|D 	cg c]  }	|	j"                   c}	       t        ddd| d|  dd||        t'        t(        d      rt(        j*                  s	 t'        t(        d      rt(        j,                  s&t.        j1                  d       t        d dd!dd|        yt2        5  t(        j,                  j5                          t(        j7                          dt(        _        ddd       g }d}|D ]  }
	 t=        |
| $      }|j%                  |       |j?                  d)      stA        |d*          |d+z  }tC        d||z  d,z  z         }t        d||j?                  d-      xs d.|
j"                   |||         |D cg c](  }|j?                  d)      s|j?                  d/      s'|* }}tE        d0 |D              }t        |      }|rt        dd1d2|||        tG        |d+3      D ]  \  }}	 tI        |d*   d4       tK        |d*   |d/          t        d5|d*   t        |d/   xs g       6       tC        d1|t        |      z  d7z  z         }t        d|d8|d*    |||        tA        |d*          d|d/<    nt        dd<d=|||        |dkD  r
d>| d?| d@}n||z
  }||k(  rdA}n	dB| dC| dD}t        dd	||||        tM                t        d||||E       t.        jO                  dF| dG| dH| dI       y#  Y xY w# 1 sw Y   wxY w#  Y xY wc c}	w # 1 sw Y   ?xY w# t8        $ r>}t.        j1                  d"|        t        d dd#t;        |       dd|        Y d}~yd}~ww xY w# t8        $ r[}t.        j1                  d%|
j"                   d&|        |
j"                  d'd%|
j"                   d&t;        |       ddd(}Y d}~d}~ww xY wc c}w # t8        $ rC}t.        j1                  d9|d*    d&|        t        d:|d*   t;        |      ;       Y d}~d}~ww xY w# tA        |d*          d|d/<   w xY w# t8        $ r^}t.        j1                  dJ|        t        dKt;        |      L       tM                t        d ddMt;        |       dd|        Y d}~yd}~ww xY w)NzMEnrich documents in background - only process files without cards (threaded).Nrun_started)r  r   r   z*Checking for files that need enrichment...r  ro   r   zNo PDF files foundrun_completedzNo files to processr^   z*_cards.json_cardsr   Tr6   r   r8   z%All files already have enriched cardszAll files already enrichedfiles_queued)r   r5   zFound z file(s) to enrich. Starting (z thread(s))...r?  r   z)Vector store not available for enrichmentrp   r   z2Failed to initialize vector store for enrichment: z$Vector store initialization failed: )r   Error enriching r   Fr   rk  r^   enriched_chunksr   rk  r   r   P   r^   	Finished r  c              3   X   K   | ]"  }t        |j                  d       xs g        $ yw)r  N)r  r(   )r   r8   s     r.   r   z)enrich_documents_async.<locals>.<genexpr>X  s$     $cQS/@)A)GR%H$cs   (*Z   z-Updating vector store with enriched chunks...)r2  zIndexing enriched chunksvector_update)r   r  r   zIndexed enriched chunks for zVector store update failed for vector_update_failedr   r&   _   z5No enriched chunks generated. Vector store unchanged.zEnrichment complete! Processed z file(s) with z total chunks.zNEnrichment completed but no files generated new cards. Check logs for details.z"Enrichment completed with issues. z succeeded, z	 skipped.)enrichedtotalr  r^   zEnrichment completed: r   z files enriched, z total chunkszError in enrichment process: 
run_failed)r&   zEnrichment failed: )(r   r   r   r   rq   r   r  r  r   r:   r  replacer   r   r;   r<   r=   r  r  r   r	  r?  r   rB   r&   r  load_existing_indexr
  rR   r  process_file_for_enrichmentr(   r   r   r   r  r   %apply_enriched_chunks_to_vector_storer   r  )r   r  r   existing_cards	card_filesource_stemfiles_to_enrichr9  r:  r-   pdf_file	file_stemtotal_files_to_processrE   ry  r   r_  r]   r8   enriched_resultstotal_chunks_processedenriched_countidxr^   skipped_counts                            r.   r  r    s   1(6Ljb]LA A/[]^`acop g./	)n!$[#7KQPQS_` :OP &^^N; 	"+.."8"82"FK26N;/ ]+j8;QQ!-- 3&*iilO3
 " 	1H I}}/I^4S&&x0	1
 $[#7^`kmx  {G  H :VW!$_!5^O3TqAFF3TU +,,J<.Xfg"	
 z>2*:Q:Qz>:*BYBY$$%PQ,Xq:XZ[]^`lm ' 3++??A//1.2J+3  ( 	H
4XLY NN6"::i(.vf~>q O12H HBNNOH$

9%D9X]]O)D&)	< (/`!!%%	2BquuM^G_A``!$$cRb$c!c-.$?&  ))9C 5V53F6NLfg9&.&QbJcd(vf~VYZ`arZsZywyVz{"2s3C/D)D(J#JKH,! 6vf~6FG'.$ 36&>B04F,-)5, %G& A77G~VlUmm{|G2^CM 66j>~>Nl[hZiirs c7<RTjlxy%'_~Mc  mC  MT  	U00@BXAYYj  lB  kC  CP  Q  	Ra3 3 4U&3 3    #UVWUX!YZ(18\]`ab]c\d6eghjkmyz    #3HMM?"QC!HI$MM$!1(--3q6(K'+#$6 a6 ! d$$'Fvf~FVVXYZX[%\]()?fVn\_`a\bccd 36&>B04F,-2  b8<=\Q8%' 10CCF8.LaQRT`aa	bsK  A(W >.W -!R	+W :R RR $W W 	2W <W R&
$?W $AR8 *R8 0>R+.R8 6	W  TA?W U)#U)5U)9AW >A6U.4BW 	RW RR R# W +R50R8 8	S?4S:5W :S??W 	U&AU!W !U&&W .	V:78V5/V=5V::V==WW 	X<AX77X<c                      j                   ddddd}	 t         j                   d       t         j                   dz  }|j	                         sFd	 j                    d
|d<   t        d j                          t         j                   |d          |S t        |d      5 }t        j                  |      }ddd       rt        |t              sFd j                    d|d<   t        d j                          t         j                   |d          |S t        |      }t         j                   d|d| d       t        d j                   ||       t               }t        j                         fd}t        j                  |d      }	|	j!                           fd}
	 |j#                  ||
|      }j%                          |r|j'                  d|t        |      d j                    dt        |       dd       t         j                   t        |      t        |      d        t        d! j                   t        |      "       |S d# j                    d|d<   t         j                   |d          t        d$ j                          	 |S # 1 sw Y   xY w# j%                          w xY w# t(        $ rr}d% j                    d&t+        |       |d<   t+        |      |d'<   t         j                   |d          t        d( j                   t+        |      )       Y d}~|S d}~ww xY w)*zWorker function to build enriched chunks for a single PDF.
    
    Args:
        pdf_file: Path object for the PDF file
        thread_count: Number of threads to use for chunk-level parallel processing
    Fr   Nr   r  zLoading chunksr  r   zNo chunks found for z, skipping.r^   file_missing_chunks)r   r8   z Invalid or empty chunk data for r   file_invalid_chunksz&Generating knowledge cards (starting, z thread(s))r   r   r^   file_started)r   r   r   c                  P     j                  d      s	  j                  d      sy y )Nr   )wait)
stop_events   r.   	heartbeatz.process_file_for_enrichment.<locals>.heartbeat  s      oob) !oob)r0   Tr  c           
      F    t        j                  | |d|  d| d       y )NzGenerating knowledge cards (r   )r  )r   r  )currentr  r  s     r.   progress_callbackz6process_file_for_enrichment.<locals>.progress_callback  s*    +#!6wiqqI	r0   )r  r   r  r   z enriched chunks))rk  r  r   r^   zCards ready for indexingfile_completed)r   r  z!No enriched chunks generated for file_no_cardsr  r   r&   
file_errorr  )r  r   r   r  r:   r   r;   r<   r=   rY  rq   r  r   r0  Eventr1  r2  build_enriched_chunksrx  rd   rR   r  )r  r   r_  r  r-   chunks_datar   agentr  heartbeat_threadr  r  rE   r  s   `            @r.   r  r    s    FBM'?OP"l%CC
  ""6x}}o[ QF9 !6X]]K+HMM6)CTUM*c" 	'a))A,K	' *[$"?"B8==/QR SF9 !6X]]K+HMM6)CTUM+&'1R]  jP  Q]  P^  ^i  hj  	k^(--[gst$&__&
	
 %++9TJ 		#99+Yj  zF9  GONNMM#2"?3&x}}oRO8L7MM^_	  ,002	 !!1WZ[jWkl M #DHMM?RS TF9+HMM6)CTU x}}E Mu	' 	'D NN&  M.x}}oRAxHya&w'vi?PQ\SVLLMMs_   BK K %J ;AK BK ,J-  BK AK  J*%K -J??K 	L=A'L88L=c                    |st         j                  d|  d       y	 t         j                  d|  dt        |       d       t        5  t
        j                  j                  | |       t
        j                  j                          ddd       t         j                  d|         y# 1 sw Y   "xY w# t        $ r!}t         j                  d|  d	|         d}~ww xY w)
z
    Safely replace existing vector store entries for a source with enriched chunks.
    
    This method uses replace_source_documents which rebuilds the index to ensure
    consistency and prevent index corruption (index/document count mismatch).
    z No enriched chunks provided for z, skipping vector store updateNzUpdating vector store for z with z enriched chunksz&Successfully updated vector store for z Error updating vector store for r   )rB   rC   r  r  r  r	  r   replace_source_documentsr  rR   r&   )source_namer  rE   s      r.   r  r    s     =k]Jhij4[MOH\G]]mno
  	1##<<[/Z##..0	1
 	@NO	1 	1  ;K=1#NOs/   +B5 	?B) B5 )B2.B5 5	C>CCz/enrichment-statusc                      t                t        j                  d      dk(  r6t        j                  d      r!t        j                  di       } | r	dt        d<   t        t              dfS )zGet current enrichment statusr\   r   r   r   r   )r   r   r(   r   )r   s    r.   get_enrichment_statusr    s\     X&&05F5J5J>5Z(,,^R@*3h'$%s**r0   z/reprocessing-statusc            
         t        t              dz  dz  } | j                         r8	 t        | d      5 }t	        j
                  |      }ddd       t              dfS t        ddd	d
dddd      dfS # 1 sw Y   +xY w# t        j                  t        t        f$ r"}t        j                  d|        Y d}~\d}~ww xY w)zGet current reprocessing statusr6   zreprocessing_status.jsonr8   Nr   z$Failed to load reprocessing status: r   r   zNo reprocessing in progressr   )r\   r]   r^   current_filer   r   chunks_created)r   r   r:   r;   r<   r=   r   r?   r@   rA   rB   r   )status_filer-   r\   rE   s       r.   get_reprocessing_statusr    s     }%
25OOK	k3' &11&6?C'' 0  	 & & $$gw7 	CA3GH	s.   A? A3	A? 3A<8A? ?B?B::B?__main__zStarting Flask application...z/RAG system will be initialized on first requestT)hostportr   )r   r   Nr  )r   r   r   r   N)NNN)r   )jflaskr   r   r   r   r   r"  ra   r<   r)  r0  concurrent.futuresr   r	   r  r   r  r   r  	functoolsr   airagagent.mainr   airagagent.configr   r   r   r   r   r   r   r   r   r   airagagent.logging_configr   r   r   airagagent.exceptionsr   r   airagagent.mistral_integrationr    __name__r%  environr(   r!   r   r1   r  r&  r   r4   rB   r	  localr   r9   r  r|   r   r>   r   Lockrb   r   r  rF   rS   rm   rv   r   r   r   r   r   r   r   r   r   r   r   r   router   r<  r`  rp  rT  rW  rU  r  r  r  r  r  r  r  r  r  r  r  r  rO  runr   r0   r.   <module>r     s   O O 	     ? *    %   Z Y B 3Ho 

/
D
EE "'l

? #3

  RZZ^^,DaHI  !
 [
$Y__&  m,z9<TT &* #$' !/   inn$inn& "INN$ C!@ 1T %   ?2CI!"!"%!  (6!e
 3  9vh'EC  (ECN
 6F8$GO  %GOR >F8,%@  -%@N3$b * 9ug&EI 'EIN $%3 &3 !"4 #4 $%I &I0 %1" 2". /0  1 D 9vh'P (P@obbQf8 %1+ 2+ !E73 4* z	
)*	
;<GGG5 r0   