
    #i$F                     d   d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlmZmZ ddlmZ 	 ddlZddlmZ ddlmZ dZ	 ddlmZ dZddlmZmZm Z m!Z!m"Z"  G d d      Z#da$de#fdZ%y# e$ r" d	Z G d
 d      ZeZ G d d      Z e       ZY Pw xY w# e$ r d	Z G d d      Z e       ZY ew xY w)z
Advanced Keyword Management System for Knowledge Card Search
Provides comprehensive keyword extraction, tagging, filtering, and scoring capabilities.
    N)DictListSetTupleOptionalAny)defaultdictCounter)Path)	stopwords)WordNetLemmatizerTFc                       e Zd Zd Zy)DummyLemmatizerc                     |S N )selfwords     4/var/www/html/backtest/airagagent/keyword_manager.py	lemmatizezDummyLemmatizer.lemmatize   s    K    N)__name__
__module____qualname__r   r   r   r   r   r      s    	r   r   c                       e Zd Zed        Zy)DummyStopwordsc                 
    g dS )N3theaanandorbutinonattoforofwithbyiyouhesheitwetheymehimherusthemthisthatthesethoseisamarewaswerebebeenbeinghavehashaddodoesdidwillwouldcouldshouldmaymightmustr   )langs    r   wordszDummyStopwords.words   s     r   N)r   r   r   staticmethodrS   r   r   r   r   r      s    		 
	r   r   )fuzzc                   ,    e Zd Zed        Zed        Zy)FuzzFallbackc                     t        | j                               }t        |j                               }t        ||z        }t        ||z        }|dkD  r||z  dz  S dS )Nr   d   )setlowerlen)r    ba_setb_setintersectionunions         r   ratiozFuzzFallback.ratio.   sY     	NE	NEuu}-L&E3819L5(3.C!Cr   c                     | j                         }|j                         }||v ryt        |      t        |      k  r||fn||f\  }t        fd|D              }t        |      dkD  r|t        |      z  dz  S dS )NrY   c              3   ,   K   | ]  }|v sd   yw)   Nr   ).0charlongers     r   	<genexpr>z-FuzzFallback.partial_ratio.<locals>.<genexpr>@   s     B46>!Bs   	r   )r[   r\   sum)r    r]   a_lowerb_lowershortermatchesrh   s         @r   partial_ratiozFuzzFallback.partial_ratio7   s     ggiGggiG'!47L3w<4Ow0V]_fUgOGVBBBG58\A5EGc'l*S0L1Lr   N)r   r   r   rT   rb   ro   r   r   r   rW   rW   -   s,    		D 
	D 
		M 
		Mr   rW   )TRADING_KEYWORD_TAXONOMYTRADING_SYNONYMSKEYWORD_WEIGHTSDOMAIN_KEYWORD_MAPPINGTECHNICAL_TERMSc            
          e Zd ZdZd Zdeeeeef   f   fdZdeee	e   f   fdZ
deee	e   f   fdZd!dedede	eeef      fd	Zded
e	e   de	e   fdZdededededef
dZdededefdZd"dedee   de	e   fdZd#dedede	eeef      fdZde	e   de	e   defdZde	e   dede	e   fdZdede	e   fdZde	e   dede	eeef      fdZdefdZdefd Zy)$KeywordManagerz
    Advanced keyword management system for knowledge card search.
    Handles keyword extraction, tagging, synonym expansion, and relevance scoring.
    c                 
   t         rr	 t        j                  j                  d       t        j                  j                  d       t               | _        t        t        j                  d            | _        nd | _        t        g d	      | _        t               | _        t        t        j                  d            | _        h d
| _        | j                  j                  | j                         | j                         | _        | j!                         | _        t$        | _        | j)                         | _        y # t        $ r; 	 t        j
                  dd       t        j
                  dd       n#  Y nxY wY ;w xY w)Nzcorpora/stopwordszcorpora/wordnetr   T)quietwordnetenglishc                     | S r   r   xs    r   <lambda>z)KeywordManager.__init__.<locals>.<lambda>e   s     r   r   >   r    r!   r'   r,   r%   r*   r&   r#   r(   r"   r$   r)   r   datatimer+   chartpricemarketvolumetradinganalysisstrategy)NLTK_AVAILABLEnltkr   findLookupErrordownloadr   
lemmatizerrZ   r   rS   
stop_wordsdomain_stop_wordsupdate_build_keyword_indexkeyword_index_build_synonym_mapsynonym_maprs   domain_filters_build_keyword_vectorskeyword_vectors)r   s    r   __init__zKeywordManager.__init__R   s(   			23		01 01DO!)//)"<=DO *DO! # DO ,-iooi89"
 	t556 "6682244  $::<I  MM+T:MM)48s)   >D> >	F.E76F7E;9FFreturnc           	         i }t        j                         D ]^  \  }}|D ]T  }||t        j                  |j	                         d      t        |j                               dd||j	                         <   V ` t        D ]E  }|j	                         |vs|ddt        |j                               dd||j	                         <   G |S )z0Build comprehensive keyword index with metadata.g333333?taxonomy)keyworddomainweightlengthtypegeneral      ?	technical)rp   itemsrr   getr[   r\   splitrt   )r   r   r   keywordsr   terms         r   r   z#KeywordManager._build_keyword_index   s     !9 > > @ 	FH# &$-11'--/3G!'--/2&2gmmo.	 $ 	Dzz|=0#'!!$**,/'/djjl+	 r   c                 R   t        t              }t        j                         D ]  \  }}|j	                         }||   j                  |D cg c]  }|j	                          c}       |D ]7  }|j	                         }||vrg ||<   |||   vs$||   j                  |       9  | j                  ret        | j                  j                               D ]?  }| j                  j                  |      }	|	|k7  s$|	||   vs,||   j                  |	       A t        |      S c c}w )zBuild synonym expansion map.)r	   listrq   r   r[   extendappendr   r   keysr   dict)
r   r   	canonicalsynonymscanonical_lowerssynonymsynonym_lowerr   lemmas
             r   r   z!KeywordManager._build_synonym_map   s'   !$' $4#9#9#; 
	GIx'oo/O(//H0Mq0MN $ G ' 313K."+m*DD.55oFG
	G ?? 2 2 7 7 9: 711':G#[5I(I(//67
 K  # 1Ns   D$
c                 ^   i }| j                   j                         D ]  }dgdz  }|D ]'  }t        |      dk  s|t        |      xx   dz  cc<   ) t        |      xs d}|D cg c]  }||z  	 }}t	        |j                               }|j                  |dz  g       |||<    |S c c}w )z5Build simple keyword vectors for similarity matching.r      re   
   )r   r   ordrj   r\   r   r   )r   vectorsr   vectorrg   totalr}   
word_counts           r   r   z%KeywordManager._build_keyword_vectors   s     ))..0 	&GS3YF +t9s?3t9%*%+
 K$1E'-.!ag.F. W]]_-JMM:?+,%GG	&"  /s   %B*textmax_keywordsc                    |j                         }t        j                  d|      }|D cg c]"  }|| j                  vst	        |      dkD  s!|$ }}t        |      }| j                  |ddg      }t        |      }g }	|j                         D ]X  \  }
}|
| j                  v s| j                  |
   }| j                  |
|||      }|	j                  |d   |||d   |d   dd       Z |j                         D ]o  \  }}|| j                  v s| j                  |   }| j                  ||||      }|	j                  |d   |||d   |d   t	        |j                               d       q |	j                  d	 d
       |	d| S c c}w )a  
        Extract relevant keywords from text with scoring.

        Args:
            text: Input text to analyze
            max_keywords: Maximum number of keywords to return

        Returns:
            List of keyword dictionaries with scores and metadata
        \b\w+\b      r   r   r   re   )r   score	frequencyr   r   r   c                     | d   S )Nr   r   r|   s    r   r~   z1KeywordManager.extract_keywords.<locals>.<lambda>  s
    aj r   TkeyreverseN)r[   refindallr   r\   r
   _extract_ngramsr   r   _calculate_keyword_scorer   r   sort)r   r   r   
text_lowerrS   w	word_freqngrams
ngram_freq
candidatesr   freqmetadatar   ngrams                  r   extract_keywordszKeywordManager.extract_keywords   s    ZZ\
 

:z2!MqQdoo%=#a&1*MM EN	 %%j1a&9V_
 
 $//+ 	JD$t)))--d355dD(JW!!'	2"!%&x0$V,# 		 &++- 	KE4***--e455eT8ZX!!'	2"!%&x0$V,!%++-0# 		 	0$?-<((U Ns   FFFn_valuesc                    g }t        j                  d|      }|D ]f  }t        t        |      |z
  dz         D ]G  }dj	                  ||||z          }t        |      dkD  s)|j                  |j                                I h |S )zExtract n-grams from text.r   re    r   )r   r   ranger\   joinr   r[   )r   r   r   r   rS   nr-   r   s           r   r   zKeywordManager._extract_ngrams  s    

:t, 	1A3u:>A-. 1q1.u:>MM%++-01	1 r   r   r   r   c                 F   |d   }t        |dz  d      }d}|j                  |j                               }|dk\  rt        dd|t	        |      z  z
        }t        |d   dz  d      }	| j                  ||      }
|dz  |dz  z   |dz  z   |	dz  z   |
dz  z   }t        |d      S )	z(Calculate relevance score for a keyword.r   皙?      ?r   r   r   g?g?)minr   r[   maxr\   _calculate_context_boost)r   r   r   r   r   
base_score
freq_boostposition_boostkeyword_poslength_boostcontext_boosttotal_scores               r   r   z'KeywordManager._calculate_keyword_score  s    h'
 S#.
 ii0! C;T+B$CDN 8H-3S9 55gtD!C' 3&'$s*+ #S() $c)	* ;$$r   c                    d}|j                  |j                               }|dk  ryt        d||z
        }t        t	        |      |t	        |      z   |z         }||| }g d}d}	|D ]  }
|
|v s|
|k7  s|	dz  }	 t        |	d      S )z5Calculate boost based on surrounding trading context.rY   r   )r   r   riskprofitlossr   r   r   	indicatorsignalpositionentryexitr   r   )r   r[   r   r   r\   )r   r   r   context_windowr   startendcontexttrading_termscontext_scorer   s              r   r   z'KeywordManager._calculate_context_boost0  s    ii0? A{^34#d)[3w<7.HIuS/X ! 	%Dw47?$	% =#&&r   Nqueryr   c                    g }t        j                  d|j                               }|D ]  }|j                  |       || j                  v r|j                  | j                  |          |sC|| j                  v sR| j                  |   D ]-  }t        j                  ||      dkD  s|j                  |       /  t        t        |            }|D cg c]"  }|| j                  vst        |      dkD  s!|$ }}|dd S c c}w )z
        Expand query with synonyms and related terms.

        Args:
            query: Original query
            domain: Optional domain filter

        Returns:
            List of expanded query terms
        r   P   r   N2   )r   r   r[   r   r   r   r   rU   rb   r   rZ   r   r\   )r   r   r   expanded_termsquery_termsr   domain_keywordts           r   expand_queryzKeywordManager.expand_queryH  s     jjU[[]; 	>D!!$' t'''%%d&6&6t&<= &D$7$77&*&9&9&&A >Nzz$7"<&--n=>	> c.12%3_q7OTWXYTZ]^T^!__cr"" `s   D(D7Dtop_kc                 d   |j                         | j                  vrg S | j                  |j                            }g }| j                  j                         D ]>  \  }}||j                         k7  s| j                  ||      }|j	                  ||f       @ |j                  d d       |d| S )a  
        Find semantically similar keywords using vector similarity.

        Args:
            keyword: Keyword to find similar terms for
            top_k: Number of similar keywords to return

        Returns:
            List of (keyword, similarity_score) tuples
        c                     | d   S Nre   r   r|   s    r   r~   z6KeywordManager.find_similar_keywords.<locals>.<lambda>  s
    ! r   Tr   N)r[   r   r   _cosine_similarityr   r   )r   r   r  target_vectorsimilarities	candidater   
similaritys           r   find_similar_keywordsz$KeywordManager.find_similar_keywordsl  s     ==?$"6"66I,,W]]_=!%!5!5!;!;!= 	=IvGMMO+!44]FK
##Y
$;<	= 	nd;FU##r   vec1vec2c                     t        d t        ||      D              }t        j                  t        d |D                    }t        j                  t        d |D                    }|dk(  s|dk(  ry|||z  z  S )z0Calculate cosine similarity between two vectors.c              3   ,   K   | ]  \  }}||z    y wr   r   )rf   r    r]   s      r   ri   z4KeywordManager._cosine_similarity.<locals>.<genexpr>  s     <DAq!a%<s   c              3   &   K   | ]	  }||z    y wr   r   )rf   r    s     r   ri   z4KeywordManager._cosine_similarity.<locals>.<genexpr>       2a!e2   c              3   &   K   | ]	  }||z    y wr   r   )rf   r]   s     r   ri   z4KeywordManager._cosine_similarity.<locals>.<genexpr>  r  r  r   )rj   zipmathsqrt)r   r  r  dot_productnorm1norm2s         r   r  z!KeywordManager._cosine_similarity  si    <CdO<<		#2T223		#2T223A:!eem,,r   r   c                 2   || j                   vr|S t        | j                   |         }g }|D ]g  }|j                         |v r|j                  |       '|D ]<  }t	        j
                  |j                         |      dkD  s+|j                  |        g i |S )z
        Filter keywords by domain relevance.

        Args:
            keywords: List of keywords to filter
            domain: Target domain

        Returns:
            Filtered list of domain-relevant keywords
        K   )r   rZ   r[   r   rU   rb   )r   r   r   domain_keywordsfilteredr   	domain_kws          r   filter_by_domainzKeywordManager.filter_by_domain  s     ,,,Od11&9: 
	G}}/1( - 	::gmmoy9B>OOG,
	 r   c                 @    || j                   vrg S | j                   |   S )z'Get all keywords for a specific domain.)r   )r   r   s     r   get_domain_keywordsz"KeywordManager.get_domain_keywords  s&    ,,,I""6**r   c                 r   |j                         g }|D ]  }|j                         }|v rd}nAt        fd|j                         D              rd}nt        j                  |      dz  dz  }|| j
                  v r|dz  }|j                  |t        |d      f        |j                  d d	       |S )
z
        Rank keywords by relevance to a query.

        Args:
            keywords: Keywords to rank
            query: Query string

        Returns:
            List of (keyword, relevance_score) tuples
        r   c              3   &   K   | ]  }|v  
 y wr   r   )rf   r   query_lowers     r   ri   z<KeywordManager.rank_keywords_by_relevance.<locals>.<genexpr>  s     KTT[(Ks   gffffff?rY   r   g333333?c                     | d   S r  r   r|   s    r   r~   z;KeywordManager.rank_keywords_by_relevance.<locals>.<lambda>  s
    !A$ r   Tr   )	r[   anyr   rU   ro   r   r   r   r   )r   r   r   rankedr   keyword_lowerr   r&  s          @r   rank_keywords_by_relevancez)KeywordManager.rank_keywords_by_relevance  s     kkm 	6G#MMOM +K]5H5H5JKK **=+FLsR  2 22MM7CsO45#	6& 	5r   filepathc                     | j                   | j                  | j                  d}t        |d      5 }t	        j
                  ||d       ddd       y# 1 sw Y   yxY w)zSave keyword index to file.)r   r   r   r   r   )indentN)r   r   r   openjsondump)r   r,  r   fs       r   save_keyword_indexz!KeywordManager.save_keyword_index  sY     "//++"11
 (C  	)AIIdAa(	) 	) 	)s   AAc                     t        |d      5 }t        j                  |      }ddd       j                  di       | _        |j                  di       | _        |j                  di       | _        y# 1 sw Y   OxY w)zLoad keyword index from file.rNr   r   r   )r/  r0  loadr   r   r   r   )r   r,  r2  r   s       r   load_keyword_indexz!KeywordManager.load_keyword_index  sl    (C  	 A99Q<D	  "XXor:88M26"hh'7<	  	 s   A11A:)   r   )r   )r   r   r   __doc__r   r   strr   r   r   r   floatr   intr   r   r   r   r   r  r   r  r  r!  r#  r+  r3  r7  r   r   r   rv   rv   L   s   
+=Zd3S#X+>&? 8!Dd3i$8 !4S$u+-=(> 09)S 9) 9)T$sTWx.EY 9)vC 49 c % % %t %[^ %ch %6' '3 '5 '0"## "#x} "#S	 "#H$S $ $d5QTV[Q[K\F] $6	-tE{ 	-$u+ 	-% 	-c C DI >+# +$s) +"49 "S "TRWX[]bXbRcMd "H	)3 	)=3 =r   rv   r   c                  .    t         
t               a t         S )z)Get singleton instance of KeywordManager.)_keyword_managerrv   r   r   r   get_keyword_managerr?    s     )+r   )&r9  r   r0  r  typingr   r   r   r   r   r   collectionsr	   r
   pathlibr   r   nltk.corpusr   	nltk.stemr   r   ImportErrorr   r   	rapidfuzzrU   RAPIDFUZZ_AVAILABLErW   airagagent.configrp   rq   rr   rs   rt   rv   r>  r?  r   r   r   <module>rI     s   
 
   8 8 , !%+N*6 b= b=J  ^ A  !N  (   I#!.  M M, >D3s#   A* B *$BBB/.B/