
    #i$              	       2   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
Jr  S SKJrJrJrJrJr  S SKJrJrJrJrJrJrJrJrJrJrJrJr  S SKJ r   S SK!J"r"J#r#J$r$J%r%J&r&J'r'  1 S	kr(1 S
kr)Sr*Sr+/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQS.r, " S S5      r-g)    )SentenceTransformerN)Path)fuzz)ListDictAnyOptionalTuple)EMBEDDINGS_DIRHYBRID_SEARCH_OVERSAMPLEHYBRID_VECTOR_WEIGHTHYBRID_KEYWORD_WEIGHTHYBRID_MIN_KEYWORD_SCOREHYBRID_BM25_WEIGHTMAX_CHUNKS_PER_SOURCETECHNICAL_TERMSSYNONYMSTRADING_KEYWORD_TAXONOMYTRADING_SYNONYMSDOMAIN_KEYWORD_MAPPING)get_keyword_manager)VectorStoreErrorIndexLoadErrorIndexSaveErrorEmbeddingErrorValidationErrorResourceError>N   alsobeenbotheachevenfromgivehavehelpintolikemakemanymoremostmuchneedonlyontoshowsomesuchtellthanthatthemtheythisuponverywantwerewhenwillwithyouraboutafterbasedbeingcouldgimmeguidemightothershallstepstheirtherethesethosewherewhichwhilewouldanswerbeforedetailguidespleaseshouldwithinbecausebetweendetailsexplainlookingproviderelatedsummarythroughdescribequestion	according	regarding	summaries	summarizeinformation>   gbudpotbookbudsgramgrowhempweedbooksganjagramsrH   plantflowerrX   manualplantsflowersgrowingharvestmanualscannabisdocument	cultivate	documents	marijuanacultivation      ?g      ?)
hydroponichydroponicshydro	soil-less	soil lesssoillesszwater cultureznutrient solutionznutrient tankzebb and flow	reservoirzdeep water culturedwczdrip systemzflood and drainsoilless mixsoilless mixture)	r   r   r   r   zperlite mixz	coco coirrockwoolzexpanded clayz	grow cube)medicalmedicine	medicinalpatientdoctor	physiciantherapeutictherapy	treatmentdosagedoseclinicalsymptom	condition	diagnosisprescriptionwellnesshealth
healthcarezpain reliefzanxiety reliefzanti-inflammatory)finance	financialfundingcapitalloancreditdebt
investmentinvestorroizreturn on investmentz	cash flowprofitprofitabilityrevenueincomemarginbudget)legallawlawsstatute
regulation
regulatory
compliancelicense	licensingpermit
permittingdecriminalizelegalizationcourtpolicy	ordinance)businessentrepreneurentrepreneurshipstartupcompanycorporationmarket	marketingsalesbranding	franchisezsupply chain	wholesaleretailcustomerclientstrategyscaling
operations
management)zmental health
psychologyr   
counseling	wellbeingstressanxietyptsd
depressionmindfulness
meditation)history
historical	chronicletimelineeracenturyancientzmodern history	historiancivilizationheritagelegacyanthropologyarchaeologyculturezcultural historyrecords)r   zsoilless growingr   r   r   r   r   r   c                   F   \ rS rSrS"S jrS rS rS rS rS r	S	 r
S
 rS#S jrS$S\S\S\\   S\\\\4      4S jjrS$S\\   S\S\\   S\\\\4      4S jjrS\S\S\4S jrS\S\S\\   S\4S jrS\S\S\4S jrS rS rS rS rS rS%S jrS&S\4S  jjrS!r g)'VectorStoreg   Nc                     SSK Jn  [        5       U l        Uc  Un[	        U5      U l        S U l        / U l        / U l        / U l	        / U l
        SU l        [        S-  U l        [        S-  U l        g )Nr   )EMBEDDING_MODEL        zfaiss_index.indexzdocuments_metadata.json)airagagent.configr   r   keyword_managerr   modelindexr   searchable_texts_rawsearchable_texts_lowerdoc_lengthsavg_doc_lengthr   
index_filemetadata_file)self
model_namer   s      9/var/www/html/leadgen/backtest/airagagent/vector_store.py__init__VectorStore.__init__h   sr    5  34(J(4

$&!&(#!(+>>+.GG    c                    U R                   R                  5       (       d  gU R                  R                  5       (       a   [        R                  " [        U R                   5      5      U l        [        U R                  SSS9 n[        R                  " U5      U l
        SSS5        U R                  5         [        S[        U R                  5       S35        gSSKJn  US-  nUR                  5       (       a   [        S5        [        R                  " [        U R                   5      5      U l        SSKn[        US5       nUR                  U5      U l
        SSS5        U R)                  5         U R                  5         UR+                  5         [        S[        U R                  5       S35        gg! , (       d  f       GN= f! [        R                   a0  n[        [        U R                  5      S	[        U5      S
.5      eSnAf[          a0  n[        [        U R                   5      S[        U5      S
.5      eSnAff = f! , (       d  f       N= f! [          a&  n[        [        U5      S[        U5      S
.5      eSnAff = f)z&Load existing FAISS index and metadataFrutf-8encodingNu   ✓ Loaded existing index with z
 documentsTjson_decode
error_typeoriginal_error
faiss_loadr   )r   zdocuments_metadata.pklz'Migrating from pickle to JSON format...rbu   ✓ Migrated index with z documents to JSON formatpickle_migration)r   existsr   faiss
read_indexstrr   openjsonloadr   _prepare_searchable_textsprintlenJSONDecodeErrorr   	Exceptionconfigr   picklesave_metadataunlink)r   fer   old_pickle_filer  s         r  load_existing_indexVectorStore.load_existing_indexy   s   %%''$$&&"--c$//.BC
$,,cGD%)YYq\DN E..07DNN8K7LJWX 	*(+CC!!##?@"--c$//.BC
/40A%+[[^DN 1 ""$..0&&(0T^^1D0EE^_` O ED
 '' $**+#0CFK   $(#/3q6J  10  $(#5QP so   AG
 F8 :G
 <A	I I	AI 8
GG
 
I+H		I+II	
II 
J
$!JJ
c                 h   U(       d  g[        U[        5      (       d  [        SS[        U5      05      e[	        U5       HN  u  p#[        U[
        5      (       d  [        SU S3U[        U5      S.5      eSU;  d  M>  [        SU S3S	U05      e   U Vs/ s H  o3S   PM	     nn U R                  R                  US
SSSS9nU R                  c/  [        R                  " UR                  S   5      U l        / U l        ["        R$                  " U5      R'                  S5      n[        R(                  " U5        U R                  R+                  U5        U R                   R-                  U5        U HX  nU R/                  U5      n	U R0                  R3                  U	5        U R4                  R3                  U	R7                  5       5        MZ     [9        S[        U5       S35        gs  snf ! [         a:  nU(       a  US   SS S-   OSn[        U[        U5      [        U5      S.5      eSnAff = f)z%Add new documents to the vector storeNzChunks must be a listreceived_typezChunk z must be a dictionary)chunk_index
chunk_typecontentz missing 'content' keyr)      TF
batch_sizeshow_progress_barconvert_to_numpynormalize_embeddingsr      z... )r  r.     float32u
   ✓ Added z chunks to vector store)
isinstancelistr   type	enumeratedictr   encoder  r   r  r  r   r  IndexFlatIPshaper   nparrayastypenormalize_L2addextend_build_searchable_textr   appendr   lowerr  )
r   chunksichunkcontents
embeddingsr#  problematic_contentembeddings_npsearchable_texts
             r  add_documentsVectorStore.add_documents   s   &$''!"9OTRX\;Z[[ "&)HAeT**%qc1F&GYZjnotjuIvww%%qc1G&H=Z[J\]]	 * 399&)$&9	o**"&!%%* + J ::**:+;+;A+>?DJDN ,33I>=)

}% 	f%E"99%@O%%,,_=''../D/D/FG 
 	
3v;-'>?@C :  	o?G(1+ds"3e";R !4Q_bck_l6mnn	os   G('G- -
H175H,,H1c                 R   U R                   c  [        SSS05      e [        R                  " U R                   [	        U R
                  5      5        U R                  5         [        S5        g! [         a/  n[        [	        U R
                  5      S[	        U5      05      eSnAff = f)z!Save the FAISS index and metadataNz No index to save - index is Noneindex_statenoneu   ✓ Saved vector store indexr  )	r   r   r  write_indexr  r   r   r  r  )r   r#  s     r  
save_indexVectorStore.save_index   s    :: !CmU[E\]]	Sdjj#doo*>? 01 	S T__!58H#a&7QRR	Ss   AA- -
B&7*B!!B&c                      [        U R                  SSS9 n[        R                  " U R                  USSS9  SSS5        g! , (       d  f       g= f! [
         a0  n[        [        U R                  5      S[        U5      S	.5      eSnAff = f)
z#Save document metadata to JSON filewr  r	  F   )ensure_asciiindentN	json_saver  )r  r   r  dumpr   r  r   r  )r   r"  r#  s      r  r   VectorStore.save_metadata   st    	qd((#@A		$..!%J A@@ 	q T%7%7!8hklmhn:opp	qs3   A "AA 
AA A 
B+BBc           	      r   U(       d  U R                  U5        g[        SU S[        U5       S35        U R                   Vs/ s H,  nUR	                  S0 5      R	                  S5      U:w  d  M*  UPM.     nn[        S[        U5       S35        XB-   n[        S	[        U5       S
35        U R
                  b  U R
                  R                  nO+U R                  R                  S/5      nUR                  S   n[        R                  " U5      U l        / U l        / U l        U Vs/ s H  o3S   PM	     nnSn	[        S[        U5      U	5       H  n
XX-    n U R                  R                  UU	SSSS9n[        R                   " U5      R#                  S5      n[        R$                  " U5        U R
                  R'                  U5        [)        XZX-    5       HZ  u  pU R+                  U5      nU R                  R-                  U5        U R                  R-                  UR/                  5       5        M\     M     XPl        [        SU S[        U5       S35        [        SU R
                  R6                   35        gs  snf s  snf ! [0         a   n[3        SU
 3[5        U5      U
S.5      eSnAff = f)a
  
Safely replace all documents for a source with new chunks.
This rebuilds the index to ensure consistency (prevents index corruption).

Args:
    source_name: Name of the source file to replace
    new_chunks: List of new chunk dictionaries to replace old ones with
Nz Replacing documents for source: z (z new chunks)metadatasourcez
  Keeping z documents from other sourcesz  Rebuilding index with z total documents...testr4  r+  r,  r   FTr-  r5  z"Error embedding batch starting at )r  batch_startu"   ✓ Successfully replaced source 'z': z chunksz  Total documents in index: )_remove_source_from_indexr  r  r   getr   dr   r;  r=  r  r<  r   r   ranger>  r?  r@  rA  rB  r9  rD  rE  rF  r  r   r  ntotal)r   source_name
new_chunksdockept_documentsupdated_documents	dimensionsample_embeddingrJ  r.  rH  batchrK  rM  jrN  r#  s                    r  replace_source_documents$VectorStore.replace_source_documents   s    **;70RJ?PP\]^  >>
)Cwwz2&**84C > 	 

 	
3~.//LMN +7 	(->)?(@@STU ::!

I  $zz00&:(..q1I &&y1
 %'!&(# /@@.?s	N.?@
q#h-4A0E!ZZ..)&+%)). / 
 !# 4 ; ;I F""=1

}- ((9an(MNFA&*&A&A#&FO--44_E//667L7L7NO O 56 +2;-s3z?BSSZ[\,TZZ->->,?@A
< A.  $8<'*1va@ s+   )J0J(JC J
J6J11J6c           	         U R                    Vs/ s H,  nUR                  S0 5      R                  S5      U:w  d  M*  UPM.     nn[        U5      [        U R                   5      :X  a  [        SU 35        g[        S[        U R                   5      [        U5      -
   SU 35        U R                  b  U R                  R
                  nO+U R                  R                  S/5      nUR                  S   n[        R                  " U5      U l        / U l        / U l        U Vs/ s H  o"S	   PM	     nnS
n[        S[        U5      U5       H  nXhX-    n	U R                  R                  U	USSSS9n
[        R                  " U
5      R!                  S5      n[        R"                  " U5        U R                  R%                  U5        ['        X8X-    5       HZ  u  pU R)                  U5      nU R                  R+                  U5        U R                  R+                  UR-                  5       5        M\     M     X0l         [        SU S[        U R                   5       35        gs  snf s  snf )zCRemove all documents for a source by rebuilding index without them.r`  ra  zNo documents found for source: Nz	Removing z documents for source: rb  r4  r+  r,  r   FTr-  r5  u   ✓ Removed source 'z'. Total documents: )r   re  r  r  r   rf  r   r;  r=  r  r<  r   r   rg  r>  r?  r@  rA  rB  r9  rD  rE  rF  )r   ri  rk  rl  rn  ro  rJ  r.  rH  rp  rK  rM  rq  rN  s                 r  rd  %VectorStore._remove_source_from_indexB  s*     >>
)Cwwz2&**84C > 	 

 ~#dnn"553K=AB	#dnn-N0CCDD[\g[hij ::!

I#zz00&:(..q1I&&y1
$&!&(#.<=ns	Nn=
q#h-4A0E**%"'!%%* + J HHZ077	BM}-JJNN=)#Nq~$FG"&"="=c"B))00A++22?3H3H3JK H 5$ ($[M1Ec$..FYEZ[\Y
, >s   )I!I!!I&c                    SnU R                    HD  nUR                  S0 5      R                  S5      U:X  d  M*  SUR                  S0 5      S'   US-  nMF     US:  a"  U R                  5         [	        SU SU S	35        U$ )
z
DEPRECATED: Use replace_source_documents instead.
This method only marks documents as deleted but doesn't remove vectors from index,
which can cause index corruption. Kept for backward compatibility but not recommended.
r   r`  ra  Tdeletedr4  u   ⚠ Marked z documents from z: as deleted (deprecated method - may cause index mismatch))r   re  
setdefaultr   r  )r   ri  countrk  s       r  mark_source_as_deleted"VectorStore.mark_source_as_deletedr  s     >>Cwwz2&**84C<@z2.y9
 "
 19 Kw&6{mC}~r  c                 H   U R                   c  [        SSS05      e[        U R                  5      S:X  a  / $ U(       d  [	        SSS05      e[        U[        5      (       d!  [	        SS	[        U5      R                  05      e[        UR                  5       5      S:X  a  [	        S
S[        U5      05      eUR                  5       n[        R                  " SU5      n/ n/ n/ n[        5       nU H'  n	U	[        ;   d  M  UR!                  [        U	   5        M)     ["         H?  n
X;   d  M
  X;  d  M  UR%                  U
5        U
[&        ;  d  M.  UR%                  U
5        MA     U H8  n
X;  d  M
  UR%                  U
5        U
[&        ;  d  M'  UR%                  U
5        M:     U H=  n	[        U	5      S::  a  M  U	[(        ;   a  M   X;   d  X;   a  M,  UR%                  U	5        M?     / n[        5       nXW-    Hr  n
X;   a  M
  UR%                  U
[        R*                  " S[        R,                  " U
5      -   S-   [        R.                  5      X;   X;   S.5        UR1                  U
5        Mt     U R3                  5         U(       a  U R5                  U5      O0 n U R6                  R9                  U/5      n[:        R<                  " U5      R?                  S5      n[@        RB                  " U5        [E        U[F        -  U5      nU R                   RI                  X5      u  nn/ n/ n[O        US   US   5       GH  u  nnU[        U R                  5      :  a  M"  U R                  U   nURQ                  S0 5      RQ                  S5      (       a  MY  UR%                  U5        U RR                  U   nU RT                  U   nSnU(       a  [V        RX                  " UU5      S-  n/ nSnSnSnSn[[        S U 5       5      n [[        S U 5       5      n!U(       a  U Hi  n"U"S   n#U#RI                  U5      (       d  M   UR%                  U"S   5        U"S   (       a  SnU"RQ                  S5      (       a  Sn[]        SUS-   5      nSnMk     [        U5      S :  a(  []        SUS![]        [        U5      S -
  S5      -  -   5      nU R_                  UU5      nU(       d  U(       a  USS" nU(       a  U(       d  U (       d  []        US#5      nU(       d  U Hz  n	[        U	5      S:  d  M  [        R*                  " S[        R,                  " U	5      -   S-   [        R.                  5      n$U$RI                  U5      (       d  Mk  []        SUS$-   5      n  O   U[`        :  a  SnU(       a:  U!(       a
  U(       d  GMw  U!(       d  U (       a
  U(       d  GM  U (       d
  U(       d  GM  U Rc                  UX5      n%[d        [g        U5      -  [h        U-  -   [j        U%-  -   n&US%   US   U&[g        U5      UU%UUUS&.	n'UR%                  U'5        GM     [m        S' U 5       5      n(U V)s/ s H  n)U)RQ                  S5      (       d  M  U)PM     n*n)U*(       d  U V)s/ s H  n)U)S   (       d  M  U)PM     n*n)U*(       d  Un*U*(       a7  U(U:  a1  UU(-
  n+U Ro                  U*U+[        U5      US(9n,URq                  U,5        URs                  S) SS*9  / n-0 n.[        5       n/U H  n'U'RQ                  S0 5      RQ                  S+S,5      n0U.RQ                  U0S5      n1U1[t        :  a  MC  U-R%                  U'5        U1S -   U.U0'   U'RQ                  S-5      n2U2b  U/R1                  U25        [        U-5      U:  d  M    O   U*(       Ga  U- V3s1 s H%  n3U3RQ                  S0 5      RQ                  S+S,5      iM'     n4n3[E        SU[        U45      -
  5      n5U5S:  a  U Ro                  U*U5U/U4US.9n6U6 H  n7U7RQ                  S0 5      RQ                  S+S,5      n0U0U4;   a  M-  U-R%                  U75        U4R1                  U05        U.RQ                  U0S5      S -   U.U0'   U7RQ                  S-5      n2U2b  U/R1                  U25        [        U-5      U:  d  M    O   [        U-5      U:  a4  U H.  n'U'U-;   a  M  U-R%                  U'5        [        U-5      U:  d  M-    U-$    U-$ ! [J         a  n[M        SU 35        / s SnA$ SnAff = fs  sn)f s  sn)f s  sn3f )/zFSearch for similar documents using hybrid vector + keyword re-ranking.NzIndex not loadedrR  rS  r   zQuery cannot be emptyquery_lengthzQuery must be a string
query_typezQuery cannot be only whitespacequery_contentz\b\w+\b   \btermpattern	is_domain
is_primaryr5  zError during search: r`  rw  r   g      Y@Fc              3   *   #    U  H	  oS    v   M     g7f)r  N .0ps     r  	<genexpr>%VectorStore.search.<locals>.<genexpr>  s     &P?O!~?O   c              3   *   #    U  H	  oS    v   M     g7f)r  Nr  r  s     r  r  r    s     'RAQA,AQr  r  r  r  Tr        ?      ?r4  皙?   皙?333333?r+  	r+  r`  scorevector_scorekeyword_score
bm25_scorehas_keyword_matchkeyword_snippet	doc_indexc              3   V   #    U  H  nUR                  S 5      (       d  M  Sv   M!     g7f)r  r4  Nre  )r  	candidates     r  r  r  ,  s%       
))}}01 Azs   )	))exclude_indices	idf_tablec                     U S   $ Nr  r  )items    r  <lambda>$VectorStore.search.<locals>.<lambda>C  s    gr  keyreversera  Unknownr  )r  exclude_sourcesr  );r   r   r  r   r   r6  r  r8  __name__stripreprrF  refindallsetr   updater   rE  GENERIC_DOMAIN_TERMSQUERY_STOPWORDScompileescape
IGNORECASErB  _ensure_searchable_texts_compute_idf_tabler   r;  r>  r?  r@  r  rA  maxr   searchr  r  zipre  r   r   r   partial_ratioanymin_extract_keyword_snippetr   _compute_bm25_scorer   floatr   r   sum_keyword_fallback_searchrC  sortr   )8r   querykquery_lowerquery_wordsdomain_termsprimary_domain_termsgeneral_termsexpanded_termswordr  keyword_patternsadded_termsr  query_embedding
oversamplescoresindicesr#  
candidatesretrieved_indicesr  idxrk  searchable_text_lowersearchable_text_rawr  matched_termsr  r  matched_domainmatched_primaryrequire_domain_matchrequire_primary_matchpattern_infoterm_patternword_patternr  combined_scorer  keyword_hit_countr  fallback_patternsneededfallback_candidatesbalanced_candidatessource_countsused_indicesra  ry  doc_idxcexisting_sourcesneeded_sourcesextra_candidatesextra_candidates8                                                           r  r  VectorStore.search  s   ::"#5v7NOOt~~!#I!"9NA;NOO%%%!":\4PU;K_K_<`aau{{}"!"CoW[\aWbEcddkkmjj[9!Dx%%htn5   $D"t'?##D)33(//5	 $ #D'##D)33(//5	 #  D4yA~&#t'<  &   e 0D"##::ebiio&=&Er}}U!1":	%  OOD! 1 	%%'AQD++,<=WY			"jj//8O hh7>>yIO/Q!991=J"jj//LOFG
 
fQi4JE3c$..))..%Cwwz2&**955$$S)$($?$?$D!"&";";C"@  M$ $ 2 2;@U VY^ ^ M %"O"N#O#&&P?O&P#P $''RAQ'R$R!$4L#/	#:L#**+>??%,,\&-AB'4-1N+//==26(+C1D(E,0) %5 }%)$']S3s=GY\]G]_`Ca=a-a$bM"&"?"?@SUe"f&+<&9$3&?O$^DX$'s$;M !'D4y1}')zz%"))D/2IE2QSUS`S`'a'../BCC,/]S5H,IM! ( 77 #(,1En+4E11#7GSJ %uU|3%56"Z/0  y>
O' %e!.(%6#2 
I i(w 5|    
) 
 
 )9P(81AEE,<OQ(8P ,< O,<q+,< O  0!2Q!6**F"&"?"?! #$5 6#	 #@ # 126E u#I]]:r266xKF!%%fa0E--&&y1$)AIM&!mmK0G"  )&'1, $ XklXkSTj" 5 9 9(I NXkl A,<(=$=>N!#'#@#@%"$0$4' $A $  (8O,00R@DDXyYF!11 '..?$((0,9,=,=fa,H1,LM&)-11+>G*$((1./14 (8 "#a''	 33#**95*+q0"" ( #"[  	)!-.I	V Q O@  ms=   Bc. d0ddd',d.
d8dddr  r  domainreturnc           
         U R                   R                  USS9nU R                  R                  S[	        U5       SUSS  Vs/ s H  oUS   PM	     sn 35        U R                   R                  X5      nU R                  R                  S[	        U5       S	35        U(       aE  U R                   R                  Xc5      nU R                  R                  S
[	        U5       SU 35        U R                  XS-  5      n/ nU GH  n	U	R                  SS5      n
U	R                  S0 5      nU R                   R                  U
SS9n/ nU H  nUS   R                  5       nXR                  5       ;   d  M+  U R                  X5      nU R                  XU5      nUS   S-  US-  -   US-  -   nUR                  US   UUS   U
R                  5       R                  U5      S.5        M     U(       a  [        S U 5       5      [	        U5      -  OSnU	R                  SS5      nUS-  US-  -   nU	R                  5       nUR!                  UUUU[	        U5      S.USS U(       a  U R#                  X5      OSS.5        UR                  U5        GM     UR%                  S SS9  U(       a*  U Vs/ s H  nUR                  SS5      S:  d  M  UPM     nnUSU $ s  snf s  snf ) a  
Enhanced search using advanced keyword management and domain filtering.

Args:
    query: Search query
    k: Number of results to return
    domain: Optional domain filter (sports, crypto, stocks, forex)

Returns:
    List of search results with enhanced keyword analysis
   )max_keywordsz
Extracted z keywords from query: N   keywordzExpanded query to z termszFiltered to z domain-specific terms for rY  r+  r3  r`  
   r  r  r  r  )r  r  r  	frequencyc              3   *   #    U  H	  oS    v   M     g7f)r  Nr  )r  kss     r  r  .VectorStore.enhanced_search.<locals>.<genexpr>  s     #I.BwK.r  r   ffffff?)matched_keywordsdoc_keywordsr  total_matchesr  )r  keyword_analysisquery_expansiondomain_relevancec                     U S   $ r  r  xs    r  r  -VectorStore.enhanced_search.<locals>.<lambda>  s    AgJr  Tr  r  )r   extract_keywordsloggerinfor  expand_queryfilter_by_domainr  re  rF  !_calculate_keyword_position_score _calculate_keyword_context_scorerE  ry  r  copyr  _calculate_domain_relevancer  )r   r  r  r  extracted_keywordskwr  base_resultsenhanced_resultsresultr+  r`  r   keyword_scoreskw_lowerposition_scorecontext_scoretotal_kw_scoredoc_keyword_scoreoriginal_scoreenhanced_scoreenhanced_resultr  s                          r  enhanced_searchVectorStore.enhanced_searchw  s;    "11BB5WYBZ:c*<&=%>>T  oA  BD  CD  oE  VF  oEhjYbVc  oE  VF  UG  H  	I --::5I-c..A-B&IJ !11BB>ZNKK|C,?+@@[\b[cde {{5a%0 "FjjB/Gzz*b1H  //@@WY@ZL  N(i=..0}}.%)%K%KH%^N$($I$I(]k$lM&(kC&7.3:N&NQ^adQd&dN"))#%i=!/"$X,%,]]_%:%:8%D	+  )" dr#I.#I ICP^L_ _wx $ZZ3N+c14E4KKN %kkmO""'(6$0%6%(%8	% $2#2#6Y_D$D$DW$Ueh
$ 
 ##O4] #b 	"6E +;b+;aquuEWYZ?[^a?a+;b##O VFJ  cs   K,K
Kkeywordsc                    U R                   R                  USR                  U5      5      nU R                  R	                  SUSS  VVs/ s H  u  pVUPM	     snn 35        [        U5      nUSS  HM  u  pU R                   R                  USS9n
UR                  U
 VVs/ s H  u  pVUS:  d  M  UPM     snn5        MO     / nU H  nU[        R                  " S[        R                  " U5      -   S-   [        R                  5      U(       a  X0R                   R                  U5      ;   OS	XSS  VV	s/ s H  u  pYUPM	     sn	n;   S
.nUR                  U5        M     U R                  XS-  0 S9n/ nU GH>  nUR                  SS5      n/ nU H  nUR!                  5       UR!                  5       ;   d  M'  [#        [        R$                  " S[        R                  " U5      -   S-   U[        R                  5      5      nUS:  d  Mx  U R                   R                  U/U5      S   S   nUR                  UUUS.5        M     U(       a  ['        S U 5       5      [#        U5      -  OSnUR)                  5       nUR                  UU[#        U5      [+        U5      S.5        UR                  U5        GMA     UR-                  S SS9  USU $ s  snnf s  snnf s  sn	nf )z
Search focused specifically on keyword matching with advanced ranking.

Args:
    keywords: List of keywords to search for
    k: Number of results to return
    domain: Optional domain filter

Returns:
    Keyword-focused search results
 zRanked keywords: Nr  r  )top_kr  r  Fr  rY  )r  r+  r3  r   r4  )r  matches	relevancec              3   6   #    U  H  oS    US   -  v   M     g7f)r'  r&  Nr  )r  kms     r  r  5VectorStore.keyword_focused_search.<locals>.<genexpr>  s     Z/B;"Y- ?/s   )r  keyword_matchestotal_keyword_matchesexpanded_keywordsc                 &    U R                  SS5      $ )Nr  r   r  r  s    r  r  4VectorStore.keyword_focused_search.<locals>.<lambda>  s    AEE/1,Er  Tr  )r   rank_keywords_by_relevancejoinr
  r  r  find_similar_keywordsr  r  r  r  r  get_domain_keywordsrE  r  re  rF  r  r  r  r  r7  r  )r   r"  r  r  ranked_keywordsr  r  r-  r  _similarr  r  resultsr  r  r+  r+  r&  relevance_scorer  r  s                         r  keyword_focused_search"VectorStore.keyword_focused_search  s    ..II(TWT\T\]eTfg,/RTSTBU-VBUYRbBU-V,WXY  M)"1-JG**@@PQ@RG$$'%Q'YRUS[b'%QR .
 (G::ebii.@&@5&H"--X[aV';';'O'OPV'WWgl%"1:M)N:M":M)NN	G ##G, ) //0@a%SU/V FjjB/G !O,==?gmmo5!"**URYYw5G-G%-OQXZ\ZgZg"hiG{*.*>*>*Y*Y[bZcel*mno*pqr*s'..'.'.)80  - vECZ/ZZ]`ap]qq  KLM$kkmO""!.#2),_)=%)*;%<	$  ##O47 < 	"EtT##q .W &R *Os   
K!K1K6Kr  r+  c                     UR                  5       nUR                  U5      nUS:  a  gSU[        U5      -  -
  nU[        U5      S-  :  a  US-  n[        US5      $ )z6Calculate score based on keyword position in document.r   r  r  r   )rF  findr  r  )r   r  r+  content_lowerkeyword_posr  s         r  r  -VectorStore._calculate_keyword_position_score  sf    #((1? c'l :; W++c!N>3''r  context_termsc                 <  ^^ UR                  5       nUR                  T5      nUS:  a  gSn[        SXV-
  5      n[        [	        U5      U[	        T5      -   U-   5      nXGU m[        UU4S jU 5       5      n	U(       a  [        U	[	        U5      -  S5      n
U
$ Sn
U
$ )z3Calculate score based on surrounding context terms.r   r2  c              3   F   >#    U  H  oT;   d  M
  UT:w  d  M  S v   M     g7fr4  Nr  )r  r  contextr  s     r  r  ?VectorStore._calculate_keyword_context_score.<locals>.<genexpr>6  s"     aMDW_aQUY`Q`aaMs   	!!	!r  )rF  r<  r  r  r  r  )r   r  r+  r@  r=  r>  window_sizestartendcontext_matchesr  rD  s    `         @r  r  ,VectorStore._calculate_keyword_context_score'  s    #((1? A{01#m$kCL&@;&NOc* aMaa KXOc-.@@#F ^_r  c                    ^ U(       d  gU R                   R                  U5      nU(       d  gUR                  5       m[        U4S jU 5       5      nU[	        U5      -  nUS:  a  US-  n[        US5      $ )z:Calculate how relevant a document is to a specific domain.r  r  c              3   R   >#    U  H  oR                  5       T;   d  M  S v   M     g7frC  )rF  )r  r  r=  s     r  r  :VectorStore._calculate_domain_relevance.<locals>.<genexpr>G  s     Q/BXXZ=5Paa/s   '	'r  g333333?)r   r3  rF  r  r  r  )r   r+  r  domain_keywordsr&  r'  r=  s         @r  r  'VectorStore._calculate_domain_relevance=  sq    ..BB6JQ/QQ c/22	 a<I9c""r  c                 D  ^ UR                  S0 5      n/ nUR                  S5      (       a)  UR                  [        UR                  S5      5      5        UR                  S5      (       a;  UR                  UR                  S/ 5       Vs/ s H  n[        U5      PM     sn5        UR                  S5      (       a;  UR                  UR                  S/ 5       Vs/ s H  n[        U5      PM     sn5        UR                  S5      (       a)  UR                  [        UR                  S5      5      5        UR                  [        UR                  SS5      5      5        SR	                  U5      nUR                  5       mUR                  S	/ 5      n[        R                  5        HJ  u  pX;   a  M  [        U4S
 jU	 5       5      (       d  M(  UR                  U5        UR                  U5        ML     U(       a  [        [        U5      5      n
SR	                  U
5      US'   UR                  S5      (       dM  UR                  SS5      nSUS    S3nUR                  U5      (       d  U U 3R                  5       US'   SUS'   SR	                  U5      $ s  snf s  snf )z>Compose searchable text using enriched metadata + raw content.r`  rc   
key_pointsthemesclean_excerptr+  r3  r$  topicsc              3   ,   >#    U  H	  oT;   v   M     g 7fNr  )r  variant
lower_texts     r  r  5VectorStore._build_searchable_text.<locals>.<genexpr>h  s     AWj(s   z, topic_label_topic_taggedz	[Topics: z] T)re  rE  r  rC  r1  rF  rx  CANONICAL_TOPIC_KEYWORDSitemsr  sortedr  
startswithr  )r   rk  r`  searchable_text_partspointtheme	base_textrT  	canonicalvariantsdistinct_topicsrc   topic_prefixrX  s                @r  rD  "VectorStore._build_searchable_textR  s*   77:r* "<<	""!((X\\)-D)EF<<%%!(((,,|]_B`)aB`#e*B`)ab<<!!!(((,,xY[B\)]B\#e*B\)]^<<((!((X\\/-J)KL$$SB)?%@AHH23	__&
$$Xr2#;#A#A#CI"AAAAi(%,,Y7 $D $S[1O&*ii&@H]#<<00",,y"5!*8M+B*C2F)),77-9N7)*D*J*J*LHY',0)xx-..9 *b)]s   
JJc                    U R                    Vs/ s H  nU R                  U5      PM     snU l        U R                   Vs/ s H  o"R                  5       PM     snU l        U R                   Vs/ s H  n[        UR                  5       5      PM     snU l        U R                  (       a1  [        U R                  5      [        U R                  5      -  U l	        gSU l	        gs  snf s  snf s  snf )z/Build cached searchable text for all documents.r   N)
r   rD  r   rF  r   r  splitr   r  r   )r   rk  texts      r  r  %VectorStore._prepare_searchable_textsx  s     ~~%
% '',%%
!
 &*%>%>'
%>TJJL%>'
# +/*C*C
*C$C

*C
 "%d&6&6"7#d>N>N:O"OD"%D%
'

s   C$ C)/#C.c                     [        U R                  5      [        U R                  5      :w  d,  [        U R                  5      [        U R                  5      :w  a  U R	                  5         gg)z8Ensure searchable texts cache is in sync with documents.N)r  r   r   r   r  )r   s    r  r  $VectorStore._ensure_searchable_texts  sN     ))*c$...AA++,DNN0CC**, Dr  c                 @   0 n[        U R                  5      nUS:X  a  U$ U Hx  nUS   nXR;   a  M  US   nSnU R                   H   nUR                  U5      (       d  M  US-  nM"     [        R                  " X7-
  S-   US-   -  S-   5      n	[        U	S5      X%'   Mz     U$ )z6Compute IDF values for query terms using BM25 formula.r   r  r  r4  r  r  r   )r  r   r  mathlogr  )
r   r  r  
total_docsr  r  r  dfrk  idfs
             r  r  VectorStore._compute_idf_table  s    	445
?$D<D 9oGB33>>$''!GB 4 ((JOc1b3h?#EFC!#smIO % r  c                 @   U(       a  U(       d  gU[        U R                  5      :  a  gU R                  U   nU[        U R                  5      :  a  U R                  U   O[        UR                  5       5      nU R                  S:  a  U R                  O
U=(       d    SnSnU Hu  nUS   n	US   n
[        U
R                  U5      5      nUS:X  a  M/  UR                  U	S5      nU[        S-   -  nU[        S[        -
  [        XV-  -  -   -  -   nX|X-  -  -  nMw     U$ )z9Compute BM25 score for a document given keyword patterns.r   r   r4  r  r  r  )	r  r   r   rj  r   r  re  BM25_K1BM25_B)r   r  r  r  doc_textdoc_lenavg_lenbm25r  r  r  tfrt  	numeratordenominators                  r  r  VectorStore._compute_bm25_score  s   yD7788..y91:SAQAQ=R1R$""9-X[\d\j\j\lXm)-)<)<q)@$%%glQR$D<D9oGW__X./BQw--c*Cgm,Iw#,7CT9U*UVVK9233D % r  c                   ^ / nU=(       d
    [        5       nU(       a  [        U5      O	[        5       n[        U R                  5       GH*  u  nmX;   a  M  U R                  U   n	U	R	                  S0 5      R	                  S5      (       a  ME  [        U4S jU 5       5      (       a  U	R	                  S0 5      R	                  SS5      n
X;   a  M  U R                  TU5      nU(       d  M  SnU R                  XU=(       d    0 5      n[        U-  [        U-  -   nU	S   U	S   USUUS	UUS
.	nUR                  U5        UR                  U5        UR                  U
5        [        U5      U:  d  GM*    U$    U$ )zFScan all documents for keyword matches when vector search misses them.r`  rw  c              3   J   >#    U  H  oS    R                  T5      v   M     g7f)r  N)r  )r  r  r  s     r  r  7VectorStore._keyword_fallback_search.<locals>.<genexpr>  s$     \K[4	?))*=>>K[s    #ra  r  r  r+  r   Tr  )r  r9  r   r   re  r  r  r  r   r   rE  rB  r  )r   r  r  r  r  r  r  local_exclude_sourcesr  rk  ra  snippetr  r|  combinedr  r  s                   @r  r  $VectorStore._keyword_fallback_search  sy    )2SU8GO 4SU(1$2K2K(L$C$%..%Cwwz2&**955\K[\\\R044XyI2778KM]^ #//yTVW)M9&-. 
  #9~ #J%$'%2"&)-'.!$
	 $**95##C(%))&1&'61""O )MN #"r  windowc                 X   U(       a  U(       d  gU H  nUS   R                  U5      nU(       d  M   [        SUR                  5       US-  -
  5      n[        [	        U5      UR                  5       US-  -   5      nXU n[        R                  " SSU5      R                  5       nUs  $    g)z9Extract a concise snippet around the first keyword match.Nr  r   rY  z\s+r$  )	r  r  rG  r  r  rH  r  subr  )	r   raw_textr  r  r  matchrG  rH  r  s	            r  r  $VectorStore._extract_keyword_snippet  s    /$DO**84EuAu{{}v{:;#h-v{)BC"-&&g6<<> % r  )
r   r   r   r   r   r   r   r   r   r   rV  )r  )r  N)NNN)r  )!r  
__module____qualname____firstlineno__r  r%  rO  rU  r   rr  rd  rz  r  r  intr	   r   r   r   r   r9  r  r  r  r  rD  r  r  r  r  r  r  __static_attributes__r  r  r  r   r   g   sK   H"/b0Ad
SqPBd.]`"r#hU$S U$S U$hsm U$W[\`adfiai\jWk U$nF$tCy F$S F$hWZm F$gklpqtvyqylzg{ F$P( (s (u (" c Z^_bZc hm ,#3 # # #*$/L&"-&,-#^3  r  r   ).sentence_transformersr   r  numpyr>  r  r  rp  pathlibr   	rapidfuzzr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   airagagent.keyword_managerr   airagagent.exceptionsr   r   r   r   r   r   r  r  rw  rx  r\  r   r  r  r  <module>r     s    5    	    3 3    ;   	

M+ ZO Or  