
    #iD                         d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZmZmZmZmZ d d	lmZmZmZ  G d
 d      Zd Z e!dk(  r e         yy)    N)Path)ListDictAnyOptionalTuple)PDFProcessor)VectorStore)RAGAgent)PDF_DIRDOCUMENTS_DIRDEFAULT_RETRIEVAL_KMIN_CHUNK_SCORE)RAGSystemErrorVectorStoreError
ModelErrorDocumentProcessingErrorValidationError)
get_loggerlog_performancelog_error_with_contextc                   X    e Zd Zd Zd Zd ZddZd Zd Zde	d	e
e	   fd
Zde	d	efdZy)	RAGSystemc                     t        t              | _        t               | _        t               | _        d | _        d| _        y )NF)	r   __name__loggerr	   pdf_processorr
   vector_store	rag_agent_initializedselfs    )/var/www/html/backtest/airagagent/main.py__init__zRAGSystem.__init__   s1     *)^'M!    c                 <    | j                   t               | _         yy)z6Initialize RAG agent (called on demand to save memory)N)r   r   r!   s    r#   initialize_modelzRAGSystem.initialize_model   s     >>!%ZDN "r%   c           	         | j                   j                  d       t        j                         }	 | j                  j	                          | j                          | j                  j                         }|r| j                   j                  dt        |       d       | j                          | j                  j                  |       | j                  j                          | j                   j                  dt        |       d       | j                  j                  rt        | j                  j                        nd}t        j                         |z
  }t        | j                   d|||rt        |      nd       | j                   j                  d	| d
       |S # t        $ r }t        | j                   |ddid        d}~ww xY w)zInitialize the RAG systemz!RAG System initialization startedzProcessing z new document chunkszSuccessfully processed 0 raw chunks (enrichment available via UI button)r   system_setup)total_documentsnew_chunks_processedz)RAG System initialized successfully with z
 documents	operationsetup_systemzFailed to initialize RAG systemN)r   infotimer   load_existing_indexensure_enriched_indexr   process_all_newlenr'   add_documents
save_index	documentsr   	Exceptionr   )r"   
start_time
new_chunks
total_docssetup_durationes         r#   r.   zRAGSystem.setup_system    s|   <=YY[
	113&&( ++;;=J   ;s:.??S!TU%%'!!//
;!!,,.  #:3z?:KK{!|}=A=N=N=X=XT..889^_J!YY[:5NDKK*4BLs:RSU KKHT^_` 	"4;;K3PRst	s   E0F" "	G+GGNc                 (   t        j                          }|t        |      dd nd}| j                  j                  d| |rt	        t        |            dkD  rdnd        |t
        }|s | j                  j                  d       dg d	S t        |t              s,| j                  j                  d
t        |              dg d	S |j                         }|s | j                  j                  d       dg d	S | j                  j                  s | j                  j                  d       dg d	S 	 | j                          | j                  |      }| j                  j                  d|        	 | j!                  |      r| j                  j"                  j%                  |d      }|D cg c]  }|d   	 }	}| j                  j'                  |	||      }
| j                  j                  dt	        |	       d       n9| j                  j)                  |||      }
| j                  j                  d       | j                  j+                  dt	        |
       d       | j                  j+                  dt	        |
       d       | j                  j                  dt	        |
       d       t-        |
dd d      D ]  \  }}|j/                  d d!      }|j/                  d"d!      }|j/                  d#d!      }|j/                  d$i       j/                  d%d&      }| j                  j                  d'| d(|dd)  d*|d+d,|d+d-|d+d.        |
D cg c]  }|j/                  d d!      t0        k\  s|  }}|rt| j                  j                  d/t	        |       d0t0         d.       |dd1 D cg c]  }|j/                  d d!      d+ }}| j                  j                  d2|        |}
nl|
rj| j                  j                  d3t0         d4       |
dd5 D cg c]  }|j/                  d d!      d+ }}| j                  j                  d6|        |
dd5 }
|
s&| j                  j                  d=|dd         d>g d	S 	 | j                  j+                  d?t	        |
       d       | j8                  j;                  |
|@      }| j                  j+                  dAt	        |       dB       | j8                  j=                  |||
      }| j8                  j?                  |
      }| j                  j                  dCt	        |       dD       t-        |dd1 d      D ]M  \  }}| j                  j                  dE| d(|j/                  d%d&      dd)  dF|j/                  d d!      d+       O t        j                          |z
  }tA        | j                  dG|t	        |      t	        |
      |rt	        |      ndHI       | j                  j                  dJ|dKdL       ||d	S # t        $ r8}t        | j                  |d|dd d       dt        |       g d	cY d}~S d}~ww xY wc c}w c c}w c c}w c c}w # t2        $ rD}| j                  j                  d7|j4                          d8|j4                   g d	cY d}~S d}~wt6        $ r9}t        | j                  |d9|dd d       d:|j4                   g d	cY d}~S d}~wt        $ r8}t        | j                  |d;|dd d       d<t        |       g d	cY d}~S d}~ww xY w# t        $ r8}t        | j                  |dM|dd d       dNt        |       g d	cY d}~S d}~ww xY w)Oz1Search for relevant documents and generate answerNd   NonezProcessing question: z... zEmpty question providedzPlease provide a question.)answersourceszInvalid question type: zQuestion must be a string.zQuestion is only whitespacez%Please provide a meaningful question.z+Search attempted but no documents availablezANo documents available in the system. Please add some PDFs first.model_initialization)r-   questionzError initializing model: zDetected domain: 
   max_keywordskeyword)kdomainz Used keyword-focused search for z	 keywordszUsed enhanced general searchzFound z search resultsz
Retrieved z chunks with scores:   scoreg        vector_scorekeyword_scoremetadatasourceUnknownz  Chunk : 2   z - Combined: .3fz
 (Vector: z, Keyword: )zFiltered to z high-quality chunks (score >=    zFiltered chunk scores: zAll chunks below threshold z, using top 3 anyway   zTop 3 scores: zQuery validation failed: zInvalid search query: vector_searchzVector store error: searchzUnexpected search error: z*No relevant documents found for question: z.No relevant documents found for your question.zFormatting context from )rE   zContext length: z characterszGenerated answer with z sourcesz	  Source z
 - Score: question_answerr   )question_lengthresults_countanswer_lengthz"Successfully answered question in z.2fsanswer_generationzError generating answer: )!r0   strr   r/   r4   r   warning
isinstancetypestripr   r7   r'   r8   r   _detect_domain_from_question"_should_use_keyword_focused_searchkeyword_managerextract_keywordskeyword_focused_searchenhanced_searchdebug	enumerategetr   r   messager   r   format_contextgenerate_answerprepare_source_summariesr   )r"   rE   rJ   r9   question_previewr=   rK   keywordskwkeyword_listsearch_resultsiresultrM   rN   rO   rQ   rfiltered_results
score_listtop3_scorescontextrB   rC   durations                            r#   search_and_answerzRAGSystem.search_and_answerE   s   YY[
2:2F3x=#.F01A0BHY\]`ai]jYknqYq5wyBz{| 9#A KK 9:6  (C(KK"9$x.9I JK6  >>#KK =>A 
   **KK MN] 	!!# 228<,VH567	66x@,,<<MMhegMh8@A"9AA!%!2!2!I!I,Z[dj!I!k  #CCDUCVV_!`a "&!2!2!B!B8qY_!B!`  !?@KKs>':&;?KLKKs>':&;?KL KKz#n*=)>>RST&~cr':A> S	6

7C0%zz.#> &

?C @J377)L  8A3b]5QT+U_`lmp_qq|  ~K  LO  }P  PQ  "R  SS ,:daQUU7C=PTc=cdd  <4D0E/FFefuevvw!xyDTUWVWDXYqw!4S 9:Y
Y  #::,!GH!1##&A/ARRf$ghESTVUVEWX!%%"5c!:;XX##n[M$BC!/!3( KKI(SWTW.IYZ[J 	KK 8^9L8M_]^nn33NX3VGKK 0WkJK^^33GX~VFnn==nMGKK5c'l^8LM&wr{A6 B	6  9QCr&**Xy2QRUSU2V1WWabhblblmtvybz{~a  "A  BB yy{Z/HDKK):H*-h-(+N(;7=F1F
 KKA(3qQR " }  	"4;;AWemnrores3tu6s1vh? 	   B*  e Z Y  	KK";AII; GH2199+>    	"4;;^fgkhk^l3mn0<   	"4;;W_`dadWe3fg5c!fX> 	J  	"4;;ATbjkolobp3qr5c!fX> 	s   1X) 1<Z -Y-9FZ Y26Y2::Z 4Y7AZ Y<9%Z F!] )	Y*2-Y%Y*%Y*-Z 	]
9[	]	].\	]	]-]]]	^-^^^c                 `   | j                   j                  rt        | j                   j                        dkD  ri }	 t        | j                   j                        dkD  r)| j                   j                  d   j                  di       }|j                  d      s|j                  d      ry| j                  j                  dt        | j                   j                         d       yt        t        t              j                  d	            }|syt        d
       	 | j                  |       y# t        t
        f$ r*}| j                  j                  d|        i }Y d}~d}~ww xY w# t        $ r}t        d|j                            d}~wt"        $ r}t        d|         d}~ww xY w)zMEnsure the vector store uses enriched knowledge cards rather than raw chunks.r   rP   z"Could not access sample metadata: Nsummarycard_idzVector store has zB documents (not enriched). Skipping enrichment for faster startup.z*_chunks.jsonz0Rebuilding knowledge index from stored chunks...zVector store rebuild failed: z!Unexpected error during rebuild: )r   r7   r4   rn   
IndexErrorAttributeErrorr   rl   r/   listr   r   globprint!_rebuild_vector_store_from_chunksr   ro   r8   )r"   sample_metadatar=   chunk_filess       r#   r2   zRAGSystem.ensure_enriched_index   s    &&3t/@/@/J/J+Ka+O !O%t((223a7&*&7&7&A&A!&D&H&HUW&XO ""9-1D1DY1O   #4S9J9J9T9T5U4V  WY  "Z  [ 4.33ODE@A	22;?' / %!!$Fqc"JK"$%(   	1!))=> 	5aS9:	s<   AD/ E+ /E(> E##E(+	F-4FF-F((F-c                 `   g }|D ]N  }	 t        |d      5 }t        j                  |      }t        |t              r|j                  |       ddd       P |st        d       yt        dt        |       d       	 t               | _
        | j                          |rN| j                  j                  |       | j                  j                          t        dt        |       d	       yy# 1 sw Y   xY w# t        $ r}t        d| d|        Y d}~d}~ww xY w# t        $ r}t        d
|j                           d}~wt         $ r}t        d|j                           d}~wt"        $ r}t        d|j                           d}~wt        $ r}t        d|         d}~ww xY w)zLRebuild the vector store using knowledge cards generated from stored chunks.rz   Nz#Warning: Failed to load chunk file rS   z'No chunk data available for rebuilding.u      • Loaded z stored chunksu!      • Rebuilt vector store with r)   u      • Vector store error: u      • Model error: u"      • Document processing error: u1      • Unexpected error rebuilding vector store: )openjsonloadrc   r   extendr8   r   r4   r
   r   r'   r5   r6   r   ro   r   r   )r"   r   
all_chunks
chunk_filefdatar=   s          r#   r   z+RAGSystem._rebuild_vector_store_from_chunks   s   
% 	OJO*c* 0a99Q<D!$-"))$/0	O ;<s:/~>?	 +D!!#!!//
;!!,,.9#j/9JJz{| %0 0  O;J<r!MNNO$   	/		{;< 	(45& 	6qyykBC 	EaSIJ	sk   C97C-C9=A.D! -C6	2C99	DDD!	F-*EF-E((F-4FF-F((F-rE   returnc                    |j                         g dg dg dg dd}i }|j                         D ]$  \  }}t        fd|D              }|dkD  s |||<   & |r)t        |j                         d 	      }|d
   d
k\  r|d   S y)z8Detect the domain from the question for targeted search.)footballsoccer
basketballbaseballhockeytennisgolfnflnbamlbnhlzpremier leaguezla liga
bundesligaoddsspread	moneylinez
over/underparlayzsports betting)bitcoinbtcethereumethcryptocurrencycrypto
blockchainminingstakingdefinftaltcointokenwalletexchange)stockequitysharesnasdaqnysedividendearningsipooptionsfuturestrading	investingz
market cap)forexcurrencyfxzeur/usdzgbp/usdzusd/jpypipleveragezcarry tradezcentral bankfedecbzinterest rate)sportsr   stocksr   c              3   ,   K   | ]  }|v sd   yw)rL   N ).0patternquestion_lowers     r#   	<genexpr>z9RAGSystem._detect_domain_from_question.<locals>.<genexpr>B  s     OgW5NOs   	r   c                     | d   S )NrL   r   )xs    r#   <lambda>z8RAGSystem._detect_domain_from_question.<locals>.<lambda>H  s
    1Q4 r%   )keyrL   N)loweritemssummax)	r"   rE   domain_patternsdomain_scoresrK   patternsrM   best_domainr   s	           @r#   rf   z&RAGSystem._detect_domain_from_question&  s    !)

*  / 5 5 7 	.FHOOOEqy(-f%	. m113HK1~""1~%r%   c                    | j                   j                  j                  d      }|D cg c]  }|d   dkD  s| }}t        |      dk\  }t	        fddD              }t        j                               dkD  }|xs |xs |S c c}w )	z5Determine if query should use keyword-focused search.   rG   rM   gffffff?rX   c              3   B   K   | ]  }|j                         v   y wN)r   )r   termrE   s     r#   r   z?RAGSystem._should_use_keyword_focused_search.<locals>.<genexpr>Z  s!      $TX^^%55 s   )
zkelly criterionzbollinger bandsrsimacd	fibonaccir   zround robin	arbitragehedger      )r   rh   ri   r4   anysplit)r"   rE   rt   ru   high_relevance_keywordskeyword_heavyspecific_terms
long_querys    `      r#   rg   z,RAGSystem._should_use_keyword_focused_searchN  s     $$44EEh]_E`08"N"BwK#<M2"N"N 349  C
   )*R/
<<*< #Os
   B	B	r   )r   
__module____qualname__r$   r'   r.   r   r2   r   ra   r   rf   boolrg   r   r%   r#   r   r      sP    "(#JQf$L&P&S &Xc] &P=3 =4 =r%   r   c            	      N   t               } | j                         }|dk(  rt        d       yt        d|        	 	 t        d      j	                         }|j                         dv ry|s1t        d       | j                  |      }t        d|d	    d
       |d   rOt        d       t        |d   d      D ]2  \  }}t        | d|d    d|d   dd       t        d|d    d
       4 # t        $ r t        d       Y yt        $ r}t        d|        Y d}~2d}~ww xY w)z"Command line interface for testingr   zDNo documents available. Please add PDFs to the pdf_directory folder.Nz"
Total documents in vector store: z*
Enter your question (or 'quit' to exit): )quitexitqz"Searching and generating answer...z	
Answer: rB   
rC   zSources:rL   z. rQ   z	 (Score: rM   rU   rV   z   Preview: previewz
Exiting...zError: )
r   r.   r   inputre   r   r   rm   KeyboardInterruptr8   )
rag_system	doc_countrE   ry   rx   rQ   r=   s          r#   mainr   b  sO   J '')IA~TU	/	{
;< 	!JKQQSH~~#886711(;FJvh/034i j!!*6)+<a!@ @IAvQCr&"2!39VG_S<QQRSTL	):(;2>?@! ( ! 	.! 	!GA3-  	!s*   +C. (C. +BC. .D$D$DD$__main__)"r   r0   pathlibr   typingr   r   r   r   r   airagagent.pdf_processorr	   airagagent.vector_storer
   airagagent.mistral_integrationr   airagagent.configr   r   r   r   airagagent.exceptionsr   r   r   r   r   airagagent.logging_configr   r   r   r   r   r   r   r%   r#   <module>r     s]       3 3 1 / 3 Z Z  Z YN= N=`
&!P zF r%   