
    #i$F                     j   S r SSKrSSKrSSKrSSKJrJrJrJrJ	r	J
r
  SSKJrJr  SSKJr   SSKrSSKJr  SSKJr  Sr SSKJr  SrSSKJrJrJ r J!r!J"r"   " S S5      r#Sq$S\#4S jr%g! \ a"    S	r " S
 S5      r\r " S S5      r\" 5       r NRf = f! \ a    S	r " S S5      r\" 5       r Nhf = f)z
Advanced Keyword Management System for Knowledge Card Search
Provides comprehensive keyword extraction, tagging, filtering, and scoring capabilities.
    N)DictListSetTupleOptionalAny)defaultdictCounter)Path)	stopwords)WordNetLemmatizerTFc                       \ rS rSrS rSrg)DummyLemmatizer   c                     U$ N )selfwords     </var/www/html/leadgen/backtest/airagagent/keyword_manager.py	lemmatizeDummyLemmatizer.lemmatize   s    K    r   N)__name__
__module____qualname____firstlineno__r   __static_attributes__r   r   r   r   r      s    	r   r   c                   $    \ rS rSr\S 5       rSrg)DummyStopwords   c                 
    / SQ$ )N3theaanandorbutinonattoforofwithbyiyouhesheitwetheymehimherusthemthisthatthesethoseisamarewaswerebebeenbeinghavehashaddodoesdidwillwouldcouldshouldmaymightmustr   )langs    r   wordsDummyStopwords.words   s     r   r   N)r   r   r   r   staticmethodrX   r   r   r   r   r    r       s    		 
	r   r    )fuzzc                   4    \ rS rSr\S 5       r\S 5       rSrg)FuzzFallback-   c                     [        U R                  5       5      n[        UR                  5       5      n[        X#-  5      n[        X#-  5      nUS:  a  XE-  S-  $ S$ )Nr   d   )setlowerlen)r%   ba_setb_setintersectionunions         r   ratioFuzzFallback.ratio.   sS     	NE	NEu}-L&E3819L(3.C!Cr   c                    ^ U R                  5       nUR                  5       nX2;   a  g[        U5      [        U5      :  a  X#4OX24u  nm[        U4S jU 5       5      n[        U5      S:  a  U[        U5      -  S-  $ S$ )Nr`   c              3   6   >#    U  H  oT;   d  M
  S v   M     g7f)   Nr   ).0charlongers     r   	<genexpr>-FuzzFallback.partial_ratio.<locals>.<genexpr>@   s     B6>!!s   		r   )rb   rc   sum)r%   rd   a_lowerb_lowershortermatchesrp   s         @r   partial_ratioFuzzFallback.partial_ratio7   sy     ggiGggiG!47L3w<4Ow0V]UgOGVBBBG58\A5EGc'l*S0L1Lr   r   N)r   r   r   r   rZ   ri   rx   r   r   r   r   r]   r]   -   s,    		D 
	D 
		M 
		Mr   r]   )TRADING_KEYWORD_TAXONOMYTRADING_SYNONYMSKEYWORD_WEIGHTSDOMAIN_KEYWORD_MAPPINGTECHNICAL_TERMSc            
          \ rS rSrSrS rS\\\\\4   4   4S jr	S\\\
\   4   4S jrS\\\
\   4   4S jrS#S\S	\S\
\\\4      4S
 jjrS\S\
\   S\
\   4S jrS\S\S\S\S\4
S jrS\S\S\4S jrS$S\S\\   S\
\   4S jjrS%S\S\S\
\\\4      4S jjrS\
\   S\
\   S\4S jrS\
\   S\S\
\   4S jrS\S\
\   4S jrS\
\   S\S\
\\\4      4S jrS\4S  jrS\4S! jrS"rg)&KeywordManagerL   z
Advanced keyword management system for knowledge card search.
Handles keyword extraction, tagging, synonym expansion, and relevance scoring.
c                    [         (       as   [        R                  R                  S5        [        R                  R                  S5        [        5       U l        [        [        R                  " S5      5      U l        OS U l        [        / S	Q5      U l        [        5       U l        [        [        R                  " S5      5      U l        1 S
kU l        U R                  R                  U R                  5        U R                  5       U l        U R!                  5       U l        [$        U l        U R)                  5       U l        g ! [         a8     [        R
                  " SSS9  [        R
                  " SSS9   GN2!     GN8= ff = f)Nzcorpora/stopwordszcorpora/wordnetr   T)quietwordnetenglishc                     U $ r   r   xs    r   <lambda>)KeywordManager.__init__.<locals>.<lambda>e   s    r   r#   >   r%   r&   r,   r1   r*   r/   r+   r(   r-   r'   r)   r.   r$   datatimer0   chartpricemarketvolumetradinganalysisstrategy)NLTK_AVAILABLEnltkr   findLookupErrordownloadr   
lemmatizerra   r   rX   
stop_wordsdomain_stop_wordsupdate_build_keyword_indexkeyword_index_build_synonym_mapsynonym_mapr}   domain_filters_build_keyword_vectorskeyword_vectors)r   s    r   __init__KeywordManager.__init__R   s*   >			23		01 01DO!)//)"<=DO *DO! # DO ,-iooi89"
 	t556 "6682244  $::<I  MM+T:MM)48s#   >E 
F*E==F?FFreturnc           	         0 n[         R                  " 5        H`  u  p#U HU  nUU[        R                  " UR	                  5       S5      [        UR                  5       5      SS.XR	                  5       '   MW     Mb     [         HH  nUR	                  5       U;  d  M  USS[        UR                  5       5      SS.XR	                  5       '   MJ     U$ )z0Build comprehensive keyword index with metadata.g333333?taxonomy)keyworddomainweightlengthtypegeneral      ?	technical)rz   itemsr|   getrb   rc   splitr~   )r   r   r   keywordsr   terms         r   r   #KeywordManager._build_keyword_index   s     !9 > > @F#&$-11'--/3G!'--/2&2mmo. $ !A $Dzz|=0#'!!$**,/'/jjl+ $ r   c                 p   [        [        5      n[        R                  " 5        H  u  p#UR	                  5       nX   R                  U Vs/ s H  oUR	                  5       PM     sn5        U H9  nUR	                  5       nXq;  a  / X'   XAU   ;  d  M&  X   R                  U5        M;     M     U R                  (       ai  [        U R                  R                  5       5       HB  nU R                  R                  U5      n	X:w  d  M%  XU   ;  d  M/  X   R                  U	5        MD     [        U5      $ s  snf )zBuild synonym expansion map.)r	   listr{   r   rb   extendappendr   r   keysr   dict)
r   r   	canonicalsynonymscanonical_lowerssynonymsynonym_lowerr   lemmas
             r   r   !KeywordManager._build_synonym_map   s   !$' $4#9#9#;I'oo/O(//H0MHqH0MN $ ' 313K."m*DD.55oF $ $< ?? 2 2 7 7 9:11':#5I(I(//6 ;
 K  # 1Ns   D3
c                 v   0 nU R                   R                  5        H  nS/S-  nU H*  n[        U5      S:  d  M  U[        U5      ==   S-  ss'   M,     [        U5      =(       d    SnU Vs/ s H  ofU-  PM	     nn[	        UR                  5       5      nUR                  US-  /5        X1U'   M     U$ s  snf )z5Build simple keyword vectors for similarity matching.r      rm   
   )r   r   ordrs   rc   r   r   )r   vectorsr   vectorro   totalr   
word_counts           r   r   %KeywordManager._build_keyword_vectors   s     ))..0GS3YFt9s?3t9%*%  
 K$1E'-.v!gvF. W]]_-JMM:?+,%G 1"  /s   .B6textmax_keywordsc                 $   UR                  5       n[        R                  " SU5      nU Vs/ s H'  oUU R                  ;  d  M  [	        U5      S:  d  M%  UPM)     nn[        U5      nU R                  USS/5      n[        U5      n/ n	UR                  5        HX  u  pXR                  ;   d  M  U R                  U
   nU R                  XX5      nU	R                  US   UUUS   US   SS.5        MZ     UR                  5        Ho  u  pXR                  ;   d  M  U R                  U   nU R                  XX5      nU	R                  US   UUUS   US   [	        UR                  5       5      S.5        Mq     U	R                  S	 S
S9  U	SU $ s  snf )z
Extract relevant keywords from text with scoring.

Args:
    text: Input text to analyze
    max_keywords: Maximum number of keywords to return

Returns:
    List of keyword dictionaries with scores and metadata
\b\w+\b      r   r   r   rm   )r   score	frequencyr   r   r   c                     U S   $ )Nr   r   r   s    r   r   1KeywordManager.extract_keywords.<locals>.<lambda>  s    ajr   TkeyreverseN)rb   refindallr   rc   r
   _extract_ngramsr   r   _calculate_keyword_scorer   r   sort)r   r   r   
text_lowerrX   w	word_freqngrams
ngram_freq
candidatesr   freqmetadatar   ngrams                  r   extract_keywordsKeywordManager.extract_keywords   s    ZZ\
 

:z2!MEqdoo%=#a&1*EM EN	 %%j1a&9V_
 
 $//+JD)))--d355d(W!!'	2"!%&x0$V,# 	 , &++-KE***--e455e8X!!'	2"!%&x0$V,!%++-0# 	 . 	0$?-<((U Ns   FFFn_valuesc                    / n[         R                  " SU5      nU Hi  n[        [        U5      U-
  S-   5       HH  nSR	                  XFXe-    5      n[        U5      S:  d  M)  UR                  UR                  5       5        MJ     Mk     U$ )zExtract n-grams from text.r   rm    r   )r   r   rangerc   joinr   rb   )r   r   r   r   rX   nr2   r   s           r   r   KeywordManager._extract_ngrams  su    

:t,A3u:>A-..u:>MM%++-0 /  r   r   r   r   c                 F   US   n[        US-  S5      nSnUR                  UR                  5       5      nUS:  a  [        SSU[	        U5      -  -
  5      n[        US   S-  S5      n	U R                  X5      n
US-  US-  -   US-  -   U	S-  -   U
S-  -   n[        US5      $ )	z(Calculate relevance score for a keyword.r   皙?      ?r   r   r   g?g?)minr   rb   maxrc   _calculate_context_boost)r   r   r   r   r   
base_score
freq_boostposition_boostkeyword_poslength_boostcontext_boosttotal_scores               r   r   'KeywordManager._calculate_keyword_score  s    h'
 S#.
 ii0! C;T+B$CDN 8H-3S9 55gD!C' 3&'$s*+ #S() $c)	* ;$$r   c                    SnUR                  UR                  5       5      nUS:  a  g[        SXC-
  5      n[        [	        U5      U[	        U5      -   U-   5      nX%U n/ SQnSn	U H  n
X;   d  M
  X:w  d  M  U	S-  n	M     [        U	S5      $ )z5Calculate boost based on surrounding trading context.r`   r   )r   r   riskprofitlossr   r   r   	indicatorsignalpositionentryexitr   r   )r   rb   r   r   rc   )r   r   r   context_windowr   startendcontexttrading_termscontext_scorer   s              r   r   'KeywordManager._calculate_context_boost0  s    ii0? A{34#d)[3w<7.HIS/X !D4?$ " =#&&r   Nqueryr   c                 <   / n[         R                  " SUR                  5       5      nU H  nUR                  U5        XPR                  ;   a  UR                  U R                  U   5        U(       d  MJ  X R                  ;   d  M[  U R                  U    H0  n[        R                  " XV5      S:  d  M  UR                  U5        M2     M     [        [        U5      5      nU Vs/ s H'  owU R                  ;  d  M  [        U5      S:  d  M%  UPM)     nnUSS $ s  snf )z
Expand query with synonyms and related terms.

Args:
    query: Original query
    domain: Optional domain filter

Returns:
    List of expanded query terms
r   P   r   N2   )r   r   rb   r   r   r   r   r[   ri   r   ra   r   rc   )r   r  r   expanded_termsquery_termsr   domain_keywordts           r   expand_queryKeywordManager.expand_queryH  s      jjU[[];D!!$' '''%%d&6&6t&<= v&$7$77&*&9&9&&ANzz$7"<&--n= 'B   c.12%3_^7O!TWXYTZ]^T^!^_cr"" `s   $D;DDtop_kc                 b   UR                  5       U R                  ;  a  / $ U R                  UR                  5          n/ nU R                  R                  5        H=  u  pVXQR                  5       :w  d  M  U R                  X65      nUR	                  XW45        M?     UR                  S SS9  USU $ )z
Find semantically similar keywords using vector similarity.

Args:
    keyword: Keyword to find similar terms for
    top_k: Number of similar keywords to return

Returns:
    List of (keyword, similarity_score) tuples
c                     U S   $ Nrm   r   r   s    r   r   6KeywordManager.find_similar_keywords.<locals>.<lambda>  s    !r   Tr   N)rb   r   r   _cosine_similarityr   r   )r   r   r  target_vectorsimilarities	candidater   
similaritys           r   find_similar_keywords$KeywordManager.find_similar_keywordsl  s     ==?$"6"66I,,W]]_=!%!5!5!;!;!=IMMO+!44]K
##Y$;< "> 	nd;FU##r   vec1vec2c                     [        S [        X5       5       5      n[        R                  " [        S U 5       5      5      n[        R                  " [        S U 5       5      5      nUS:X  d  US:X  a  gX4U-  -  $ )z0Calculate cosine similarity between two vectors.c              3   .   #    U  H  u  pX-  v   M     g 7fr   r   )rn   r%   rd   s      r   rq   4KeywordManager._cosine_similarity.<locals>.<genexpr>  s     <ODA!%Os   c              3   *   #    U  H	  oU-  v   M     g 7fr   r   )rn   r%   s     r   rq   r*         2T!eT   c              3   *   #    U  H	  oU-  v   M     g 7fr   r   )rn   rd   s     r   rq   r*    r,  r-  r   )rs   zipmathsqrt)r   r&  r'  dot_productnorm1norm2s         r   r  !KeywordManager._cosine_similarity  se    <CO<<		#2T223		#2T223A:!em,,r   r   c                 H   X R                   ;  a  U$ [        U R                   U   5      n/ nU Hp  nUR                  5       U;   a  UR                  U5        M*  U H@  n[        R
                  " UR                  5       U5      S:  d  M.  UR                  U5          Mn     Mr     U$ )z
Filter keywords by domain relevance.

Args:
    keywords: List of keywords to filter
    domain: Target domain

Returns:
    Filtered list of domain-relevant keywords
K   )r   ra   rb   r   r[   ri   )r   r   r   domain_keywordsfilteredr   	domain_kws          r   filter_by_domainKeywordManager.filter_by_domain  s     ,,,Od11&9:G}}/1( -	::gmmoy9B>OOG, -   r   c                 B    XR                   ;  a  / $ U R                   U   $ )z'Get all keywords for a specific domain.)r   )r   r   s     r   get_domain_keywords"KeywordManager.get_domain_keywords  s$    ,,,I""6**r   c                   ^ UR                  5       m/ nU H  nUR                  5       nUT;   a  SnOH[        U4S jUR                  5        5       5      (       a  SnO[        R                  " UT5      S-  S-  nXPR
                  ;   a  US-  nUR                  U[        US5      45        M     UR                  S SS	9  U$ )
z
Rank keywords by relevance to a query.

Args:
    keywords: Keywords to rank
    query: Query string

Returns:
    List of (keyword, relevance_score) tuples
r   c              3   ,   >#    U  H	  oT;   v   M     g 7fr   r   )rn   r   query_lowers     r   rq   <KeywordManager.rank_keywords_by_relevance.<locals>.<genexpr>  s     K5JT[(5Js   gffffff?r`   r   g333333?c                     U S   $ r  r   r   s    r   r   ;KeywordManager.rank_keywords_by_relevance.<locals>.<lambda>  s    !A$r   Tr   )	rb   anyr   r[   rx   r   r   r   r   )r   r   r  rankedr   keyword_lowerr   rB  s          @r   rank_keywords_by_relevance)KeywordManager.rank_keywords_by_relevance  s     kkmG#MMOM +K]5H5H5JKKK **=+FLsR  2 22MM7CsO45#  & 	5r   filepathc                     U R                   U R                  U R                  S.n[        US5       n[        R
                  " X#SS9  SSS5        g! , (       d  f       g= f)zSave keyword index to file.)r   r   r   r   r   )indentN)r   r   r   openjsondump)r   rK  r   fs       r   save_keyword_index!KeywordManager.save_keyword_index  sN     "//++"11
 (C AIIda( !  s   A
Ac                    [        US5       n[        R                  " U5      nSSS5        WR                  S0 5      U l        UR                  S0 5      U l        UR                  S0 5      U l        g! , (       d  f       NT= f)zLoad keyword index from file.rNr   r   r   )rN  rO  loadr   r   r   r   )r   rK  rQ  r   s       r   load_keyword_index!KeywordManager.load_keyword_index  se    (C A99Q<D ! "XXor:88M26"hh'7< ! s   A22
B )r   r   r   r   r   r   r   )   r   )r   )r   r   r   r   __doc__r   r   strr   r   r   r   floatr   intr   r   r   r   r   r  r   r$  r  r;  r>  rI  rR  rW  r   r   r   r   r   r   L   s   
+=Zd3S#X+>&? 8!Dd3i$8 !4S$u+-=(> 09)S 9) 9)T$sTWx.EY 9)vC 49 c % % %t %[^ %ch %6' '3 '5 '0"## "#x} "#S	 "#H$S $ $d5QTV[Q[K\F] $6	-tE{ 	-$u+ 	-% 	-c C DI >+# +$s) +"49 "S "TRWX[]bXbRcMd "H	)3 	)=3 =r   r   r   c                  0    [         c
  [        5       q [         $ )z)Get singleton instance of KeywordManager.)_keyword_managerr   r   r   r   get_keyword_managerr`    s     )+r   )&rZ  r   rO  r0  typingr   r   r   r   r   r   collectionsr	   r
   pathlibr   r   nltk.corpusr   	nltk.stemr   r   ImportErrorr   r    	rapidfuzzr[   RAPIDFUZZ_AVAILABLEr]   airagagent.configrz   r{   r|   r}   r~   r   r_  r`  r   r   r   <module>rj     s   
 
   8 8 , !%+N*6 b= b=J  ^ A  !N  (   I#!.  M M, >D3s#   A+ B +%BBB21B2