
    ANCi8G                         d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlmZmZmZ d dlmZmZmZmZmZ d d	lmZmZmZ  G d
 d      Zd Ze dk(  r e        yy)    N)Path)PDFProcessor)VectorStore)RAGAgent)PDF_DIRDOCUMENTS_DIRDEFAULT_RETRIEVAL_KMIN_CHUNK_SCORE)enhance_retrieved_contextsynthesize_answerstrip_reasoning)RAGSystemErrorVectorStoreError
ModelErrorDocumentProcessingErrorValidationError)
get_loggerlog_performancelog_error_with_contextc                   >    e Zd Zd Zd Zd ZdefdZd
dZd Z	d	 Z
y)	RAGSystemc                     t        t              | _        t               | _        t               | _        d | _        d| _        y )NF)	r   __name__loggerr   pdf_processorr   vector_store	rag_agent_initializedselfs    (/var/www/html/leadgen/airagagent/main.py__init__zRAGSystem.__init__   s1     *)^'M!    c                 <    | j                   t               | _         yy)z6Initialize RAG agent (called on demand to save memory)N)r   r   r   s    r!   initialize_modelzRAGSystem.initialize_model   s     >>!%ZDN "r#   c           	         | j                   j                  d       t        j                         }	 | j                  j	                          | j                          | j                          | j                  j                         }|r| j                   j                  dt        |       d       | j                          | j                  j                  |       | j                  j                          | j                   j                  dt        |       d       | j                  j                  rt        | j                  j                        nd}t        j                         |z
  }t        | j                   d|||rt        |      nd       | j                   j                  d	| d
       |S # t        $ r }t!        | j                   |ddid        d}~ww xY w)zInitialize the RAG systemz!RAG System initialization startedzProcessing z new document chunkszSuccessfully processed 0 raw chunks (enrichment available via UI button)r   system_setup)total_documentsnew_chunks_processedz)RAG System initialized successfully with z
 documents	operationsetup_systemzFailed to initialize RAG systemN)r   infotimer   load_existing_indexensure_enriched_indexsync_chunk_files_to_indexr   process_all_newlenr%   add_documents
save_index	documentsr   	Exceptionr   )r    
start_time
new_chunks
total_docssetup_durationes         r!   r,   zRAGSystem.setup_system!   s   <=YY[
"	113&&(
 **, ++;;=J   ;s:.??S!TU%%'!!//
;!!,,.  #:3z?:KK{!|}=A=N=N=X=XT..889^_J!YY[:5NDKK*4BLs:RSU KKHT^_` 	"4;;K3PRst	s   F F2 2	G;GGreturnc           
         	 t        t        t              j                  d            }|syt	               }| j
                  j                  xs g D ];  }|j                  di       xs i j                  d      }|s+|j                  |       = g }d}|D ]  }	 t        |dd      5 }t        j                  |      }	ddd       t        	t               r|	sD|	d   }
d}t        |
t              r8|
j                  di       xs i j                  d      xs |
j                  d      }|s||v r|	D cg c]H  }t        |t              s|j                  d	      s&t        |j                  di       t              sG|J }}|s|j                  |       |j                  |       |d
z  } |r`| j                   j#                  dt%        |       d| d       | j
                  j'                  |       | j
                  j)                          |S # 1 sw Y   XxY w# t        $ r Y w xY wc c}w # t        $ r(}| j                   j+                  d|        Y d}~yd}~ww xY w)z
        Ensure every stored *_chunks.json file is represented in the vector index.
        Returns number of sources added.
        *_chunks.jsonr   metadatasourcerzutf-8)encodingNcontent   zSync: adding z chunks from z# missing source(s) into FAISS indexz-Sync: failed to sync chunk files into index: )listr   r   globsetr   r6   getaddopenjsonloadr7   
isinstancedictextendr   r-   r3   r4   r5   warning)r    chunk_filesexisting_sourcesdocsrcchunks_to_addsources_added
chunk_filefdatafirstrA   cvalidr<   s                  r!   r1   z#RAGSystem.sync_chunk_files_to_indexK   sh   
4	tM277HIK"u((228b .wwz2.4"99(C$((-.
 MM) #
j#@ ,A#yy|,
 "$-T QeT*#ii
B7=2BB8LcPUPYPYZbPcF-- %)  BqJq$,?AEE)DTYcdedidijtvxdy{  ZA  B  B$$U+ $$V,"9#<   =]1C0DMR_Q`  aD  "E  F!!//>!!,,.  C, ,  $ B  	KK"OPQs ST	s   )H5 AH5 ;H5 H 'H=H A.H5 3H0	H0 H0<H0 BH5 H	H  	H-)H5 ,H--H5 5	I&>I!!I&Nc                    t        j                          }|t        |      dd nd}| j                  j                  d| |rt	        t        |            dkD  rdnd        |t
        }|s | j                  j                  d       dg d	S t        |t              s,| j                  j                  d
t        |              dg d	S |j                         }|s | j                  j                  d       dg d	S | j                  j                  s | j                  j                  d       dg d	S 	 | j                          	 | j                  j                  ||      }| j                  j!                  dt	        |       d       | j                  j                  dt	        |       d       t#        |dd d      D ]  \  }}|j%                  dd      }	|j%                  dd      }
|j%                  dd      }|j%                  di       j%                  dd       }| j                  j                  d!| d"|dd#  d$|	d%d&|
d%d'|d%d(        t'        j(                  d)|j+                               D ch c]  }t	        |      d*kD  r| c}d+t,        ffd,}g }|D ]<  }|j%                  dd      t.        k\  } ||      d-k\  }|s|s,|j1                  |       > |rt| j                  j                  d.t	        |       d/t.         d(       |dd0 D cg c]  }|j%                  dd      d% }}| j                  j                  d1|        |}nl|rj| j                  j                  d2t.         d3       |dd* D cg c]  }|j%                  dd      d% }}| j                  j                  d4|        |dd* }|s&| j                  j                  d;|dd         d<g d	S 	 | j8                  j;                  |      }| j                  j                  d=t	        |       d>       t#        |dd0 d      D ]M  \  }}| j                  j                  d?| d"|j%                  dd       dd#  d@|j%                  dd      d%       O d}	 | j                  j                  dA       t=        ||      }t?        ||      }tA        |      }|rdC|xs dv rd| j                  j                  dD       | j8                  jC                  ||E      }| j8                  jE                  |||      }|rtA        |      n|}t        j                          |z
  }tG        | j                  dF|t	        |      t	        |      |rt	        |      ndGH       | j                  j                  dI|dJdK       ||dLdMS # t        $ r8}t        | j                  |d|dd d       dt        |       g d	cY d}~S d}~ww xY wc c}w c c}w c c}w # t2        $ rD}| j                  j                  d5|j4                          d6|j4                   g d	cY d}~S d}~wt6        $ r9}t        | j                  |d7|dd d       d8|j4                   g d	cY d}~S d}~wt        $ r8}t        | j                  |d9|dd d       d:t        |       g d	cY d}~S d}~ww xY w# t        $ r+}| j                  j                  dB|        d}Y d}~6d}~ww xY w# t        $ r8}t        | j                  |dN|dd d       dOt        |       g d	cY d}~S d}~ww xY w)Pz1Search for relevant documents and generate answerNd   NonezProcessing question: z... zEmpty question providedzPlease provide a question.)answersourceszInvalid question type: zQuestion must be a string.zQuestion is only whitespacez%Please provide a meaningful question.z+Search attempted but no documents availablezANo documents available in the system. Please add some PDFs first.model_initialization)r+   questionzError initializing model: )kzFound z search resultsz
Retrieved z chunks with scores:
   rE   scoreg        vector_scorekeyword_scorer@   rA   Unknownz  Chunk : 2   z - Combined: .3fz
 (Vector: z, Keyword: )\b\w+\b   r=   c                 x   | j                  di       xs i j                  dd      xs d}|syt        j                  ddt        |      t        j                        }|j                  dd      j                  d	d      j                         }t        t        j                  d
|            }t        |z        S )Nr@   rA   ra   r   z\.(pdf|txt)$)flags_ -rp   )
rI   resubstr
IGNORECASEreplacelowerrH   findallr3   )	candidaterU   cleaned
src_tokensquery_tokenss       r!   source_overlapz3RAGSystem.search_and_answer.<locals>.source_overlap   s     }}Z4:??"MSQS&&"c#hbmmT!//#s3;;CEKKM J!@A
<*455r#      zFiltered to z high-quality chunks (score >=    zFiltered chunk scores: zAll chunks below threshold z, using top 3 anywayzTop 3 scores: zQuery validation failed: zInvalid search query: vector_searchzVector store error: searchzUnexpected search error: z*No relevant documents found for question: z.No relevant documents found for your question.zGenerated answer with z sourcesz	  Source z
 - Score: z%Applying grounded answer synthesis...zSynthesis failed: z$No sufficiently relevant informationz5Falling back to extraction-based answer generation...)re   question_answerr   )question_lengthresults_countanswer_lengthz"Successfully answered question in z.2fsT)rb   rc   enhancedanswer_generationzError generating answer: )$r.   ry   r   r-   r3   r	   rQ   rN   typestripr   r6   r%   r7   r   r   debug	enumeraterI   rw   r}   r|   intr
   appendr   messager   r   prepare_source_summariesr   r   r   format_contextgenerate_answerr   )r    re   rf   r8   question_previewr<   search_resultsiresultrh   ri   rj   rA   tr   filtered_resultsrB   score_oktitle_match
score_listtop3_scoresrc   final_answerenhanced_contextsynthesized_answercontextrb   durationr   s                               @r!   search_and_answerzRAGSystem.search_and_answer   s   YY[
2:2F3x=#.F01A0BHY\]`ai]jYknqYq5wyBz{| 9#A KK 9:6  (C(KK"9$x.9I JK6  >>#KK =>A 
   **KK MN] 	!!#@	!..55h!5DNKKs>':&;?KL KKz#n*=)>>RST&~cr':A> S	6

7C0%zz.#> &

?C @J377)L  8A3b]5QT+U_`lmp_qq|  ~K  LO  }P  PQ  "R  SS ::j(..2BCq6A: L
6S 6  "# /55#./A,Q/14{$++A.	/    <4D0E/FFefuevvw!xyDTUWVWDXYqw!4S 9:Y
Y  #::,!GH!1##&A/ARRf$ghESTVUVEWX!%%"5c!:;XX##n[M$BC!/!3( KKI(SWTW.IYZ[J *	nn==nMGKK5c'l^8LM&wr{A6 B	6  9QCr&**Xy2QRUSU2V1WWabhblblmtvybz{~a  "A  BB  L$  !HI#<X~#V %6xAQ%R"./AB  #IlN`^`#a  !XY..77QY7Z77>Z:@v6fyy{Z/HDKK):H*-h-(+N(;=IL(9qR
 KKA(3qQR&"  c  	"4;;AWemnrores3tu6s1vh? 	.. Z Y  	KK";AII; GH2199+>    	"4;;^fgkhk^l3mn0<   	"4;;W_`dadWe3fg5c!fX> 	8  $##&8$<=#$0  	"4;;ATbjkolobp3qr5c!fX> 	s   2V2 D3X 6W6>X AX W;2AX X %X *B$\ >[ C$\ 2	W3;-W.(W3.W36X 	[9Y[[.Z[[-[[[	\ \=\ \\ 	]-]]]c                 `   | j                   j                  rt        | j                   j                        dkD  ri }	 t        | j                   j                        dkD  r)| j                   j                  d   j                  di       }|j                  d      s|j                  d      ry| j                  j                  dt        | j                   j                         d       yt        t        t              j                  d	            }|syt        d
       	 | j                  |       y# t        t
        f$ r*}| j                  j                  d|        i }Y d}~d}~ww xY w# t        $ r}t        d|j                            d}~wt"        $ r}t        d|         d}~ww xY w)zMEnsure the vector store uses enriched knowledge cards rather than raw chunks.r   r@   z"Could not access sample metadata: Nsummarycard_idzVector store has zB documents (not enriched). Skipping enrichment for faster startup.r?   z0Rebuilding knowledge index from stored chunks...zVector store rebuild failed: z!Unexpected error during rebuild: )r   r6   r3   rI   
IndexErrorAttributeErrorr   r   r-   rF   r   r   rG   print!_rebuild_vector_store_from_chunksr   r   r7   )r    sample_metadatar<   rR   s       r!   r0   zRAGSystem.ensure_enriched_index-  s    &&3t/@/@/J/J+Ka+O !O%t((223a7&*&7&7&A&A!&D&H&HUW&XO ""9-1D1DY1O   #4S9J9J9T9T5U4V  WY  "Z  [ 4.33ODE@A	22;?' / %!!$Fqc"JK"$%(   	1!))=> 	5aS9:	s<   AD/ E+ /E(> E##E(+	F-4FF-F((F-c                 `   g }|D ]N  }	 t        |d      5 }t        j                  |      }t        |t              r|j                  |       ddd       P |st        d       yt        dt        |       d       	 t               | _
        | j                          |rN| j                  j                  |       | j                  j                          t        dt        |       d	       yy# 1 sw Y   xY w# t        $ r}t        d| d|        Y d}~d}~ww xY w# t        $ r}t        d
|j                           d}~wt         $ r}t        d|j                           d}~wt"        $ r}t        d|j                           d}~wt        $ r}t        d|         d}~ww xY w)zLRebuild the vector store using knowledge cards generated from stored chunks.rB   Nz#Warning: Failed to load chunk file rl   z'No chunk data available for rebuilding.u      • Loaded z stored chunksu!      • Rebuilt vector store with r'   u      • Vector store error: u      • Model error: u"      • Document processing error: u1      • Unexpected error rebuilding vector store: )rK   rL   rM   rN   rF   rP   r7   r   r3   r   r   r%   r4   r5   r   r   r   r   )r    rR   
all_chunksrX   rY   rZ   r<   s          r!   r   z+RAGSystem._rebuild_vector_store_from_chunksS  s   
% 	OJO*c* 0a99Q<D!$-"))$/0	O ;<s:/~>?	 +D!!#!!//
;!!,,.9#j/9JJz{| %0 0  O;J<r!MNNO$   	/		{;< 	(45& 	6qyykBC 	EaSIJ	sk   C97C-C9=A.D! -C6	2C99	DDD!	F-*EF-E((F-4FF-F((F-)N)r   
__module____qualname__r"   r%   r,   r   r1   r   r0   r    r#   r!   r   r      s1    "((T93 9veN$L&r#   r   c            	      N   t               } | j                         }|dk(  rt        d       yt        d|        	 	 t        d      j	                         }|j                         dv ry|s1t        d       | j                  |      }t        d|d	    d
       |d   rOt        d       t        |d   d      D ]2  \  }}t        | d|d    d|d   dd       t        d|d    d
       4 # t        $ r t        d       Y yt        $ r}t        d|        Y d}~2d}~ww xY w)z"Command line interface for testingr   zDNo documents available. Please add PDFs to the pdf_directory folder.Nz"
Total documents in vector store: z*
Enter your question (or 'quit' to exit): )quitexitqz"Searching and generating answer...z	
Answer: rb   
rc   zSources:rE   z. rA   z	 (Score: rh   rn   ro   z   Preview: previewz
Exiting...zError: )
r   r,   r   inputr   r|   r   r   KeyboardInterruptr7   )
rag_system	doc_countre   r   r   rA   r<   s          r!   mainr   {  sO   J '')IA~TU	/	{
;< 	!JKQQSH~~#886711(;FJvh/034i j!!*6)+<a!@ @IAvQCr&"2!39VG_S<QQRSTL	):(;2>?@! ( ! 	.! 	!GA3-  	!s*   +C. (C. +BC. .D$D$DD$__main__)!rL   r.   rw   pathlibr   r   r   r   r   mistral_integrationr   configr   r   r	   r
   answer_synthesisr   r   r   
exceptionsr   r   r   r   r   logging_configr   r   r   r   r   r   r   r#   r!   <module>r      s`      	  & $ ( O O Z Z  O Nf fP&!P zF r#   