
     {il                       d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZ dd	lm Z  e G d
 d             Z!dddddddddddddddddddddddddddddddZ"g dZ#g dZ$g dZ%g dZ&g dZ'g dZ(g d Z)e#e$z   e%z   e&z   e'z   e(z   e)z   Z*d!d"d#d$d%d&d'd(Z+d)e,d*e-fd+Z.d)e,d*e/fd,Z0g d-Z1e(je                  e1       g d.g d/g d0g d1g d2g d3g d4g d5g d6g d7d8
Z3g d9g d:g d;g d<g d=g d>g d?g d@dAZ4dBe,d*e,fdCZ5dDdEdFdGd!d&d%dHdIZ6g dJg dKg dLdMdNgg dOdPZ7dBe,d*e,fdQZ8g dRg dSg dTg dUdVZ9dBe,d*e:fdWZ;g dXg dYg dZg d[g d\g d]g d^g d_d`Z<dae,d*e=fdbZ>g dcZ?dBe,d*e=fddZ@dBe,d*e:fdeZAdae,d*e=fdfZBdgg dhfdig djfdkg dlfdmg dnfdog dpfdqg drfdsg dtfdug dvfgZCdwg dxg dydzdDg d{g d|dzdGg d}g d~dzdg g ddzdZDdDdBe,de:d*e/fdZEdDde:de:d*e/fdZFg ddEdddg dddddg dddddg dddddg ddGdddg ddddddZGdBe,d*e=fdZHg dg dg dg ddg dddgg dg ddg dddgg dg ddg dg dddgg ddg dg dg dg ddg dg dg dg ddg dg dg dg ddg d¢g dâg dĢg dŢdƜdǜZIdae,d*e=fdȄZJdBe,d*e,fdɄZKdg g d!d!dd!ddd˜	ZLde:d*e:fd̈́ZMdDZNd#ZO	 ddlPZPdZQ	 ddlSmTZU dZV	 ddlWmXZX dZY	 ddlZZ[dZ\	 ddl]m^Z^ dZ_	 ddl`Z`dZa	 ddlbZbdZc G dӄ dԫ      Zd G dՄ d֫      Ze G dׄ dث      Zf G dل dګ      Zg G dۄ dܫ      Zh eeidݬޫ      Zj	 ddlkmlZl ejj                  el        end        e!       Zp eeep      Zq efep      Zr egep      Zs ehep      Zti Zuddddddddd e-ej                  j                  dd            d
ax e	j                         Zz e	j                         Z{ e
j                         Z}da~epj                  dz  Zd Zd Zej                  j                  dd      Zd Z end       erj                           e        de,de,de-de,fdZde,dede,fdZejj                  d      d        Zejj                  d      d        Zde/d*e:fdZde/d*e:fdZde/d*e:fdZde:d*e:fdZejj                  d dg      d        Zejj                  ddg      d        Zejj                  ddg      ed               Zejj                  d	dg      d
        Zejj                  ddg      ed               Zejj                  ddg      ed               Zejj                  ddg      d        Zejj                  ddg      d        Zejj                  ddg      d        Zejj                  ddg      d        Zejj                  dddg      ed               Zejj                  ddg      ed               Zejj                  ddg      ed               Zejj                  ddg      d        Zejj                  d dg      d!        Zejj                  d"dg      ed#               Zejj                  d$dg      d%        Zejj                  d&dg      d'        Zejj                  d(dg      ed)               Zg i i g  ejD                  ejF                        jI                         d*a e	j                         Zdad+ Zd, Zdae,d-e:fd.Zd-e:d*e-fd/Zejj                  d0dg      d1        Zejj                  d2dg      d3        Zejj                  d4dg      d5        Zejj                  d6dg      d7        Z	 dd8lmZ  e        eid:k(  r e-ej                  j                  d;d<            Z end=e         end>ea d?e_ d@eQ         endA eerjh                                 e        ejjk                  dBedϐC       yy# eR$ r dZQY w xY w# eR$ r dZVY w xY w# eR$ r dZYY w xY w# eR$ r dZ\Y w xY w# eR$ r dZ_Y w xY w# eR$ r dZaY w xY w# eR$ r dZcY w xY w# eR$ rZo endeo        Y dZo[odZo[oww xY w# e$ rZo end9eo        Y dZo[odZo[oww xY w(E  z
Advanced Legal Document RAG Service
Incorporates sophisticated PDF processing, FAISS vector search, BM25 hybrid scoring,
and DeepSeek/Tesseract OCR fallback.

Port: 5007 (configurable via RAG_PORT)
    N)datetimetimezone)Pathwraps)	dataclass)ListDictAnyOptionalTuple)Flaskrequestjsonifysend_from_directoryrender_template)secure_filenamec                      e Zd ZU dZ ee      j                  Zeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed<   d	Zeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zy)	RAGConfigzRAG Service Configurationbase_dirNdata_dir	cache_dirpdf_dir  
chunk_size  chunk_min_size@  chunk_max_size   sentence_overlap   min_sentences      ?hybrid_vector_weighthybrid_keyword_weight      ?hybrid_bm25_weightmax_chunks_per_source
   	default_khttp://127.0.0.1:5003/ocrdeepseek_ocr_urlhttp://127.0.0.1:5002/ocrtesseract_ocr_urlc                    | j                   ;t        t        j                  j	                  d| j
                  dz              | _         | j                  | j                   dz  | _        | j                  | j                   dz  | _        | j                  j                  dd       | j                  j                  dd       y )NDATA_DIRdata	rag_cachecsvT)parentsexist_ok)	r   r   osenvirongetr   r   r   mkdirselfs    E/var/www/html/eventheodds/ai-lawyer-rag/advanced_legal_rag_service.py__post_init__zRAGConfig.__post_init__;   s    ==  
DMMF<R!STDM>>!!]][8DN<<==50DL 	TD94$7    )__name__
__module____qualname____doc__r   __file__parentr   __annotations__r   r   r   r   intr   r   r!   r#   r%   floatr&   r(   r)   r+   r-   strr/   r>    r?   r=   r   r       s    #(^**Hd*HdItGT JNCNCcM3 #'%&#'5' $$!"3"Is 8c78s8
8r?   r      r        r    )r   r   r   r!   r#   r     i  r"   i  ix  r   )	technicalresearchlegalsportsdefault)'zlegal doctrineholdingdicta	precedentzstare decisisanalogydistinguishing	syllogismzrule applicationzfact patternelementprongtestzdaubert testzbalancing testzmulti-factor testzbright-line rulestandard of reviewde novoabuse of discretionzclear errorburden of productionzburden of persuasionpresumption	inferencezprima facie caseaffirmative defensegenuine issue of material factmoving partynon-moving partyzcelotex trilogyzreasonable doubtzpreponderance of evidencezclear and convincingstrict scrutinyintermediate scrutinyrational basiszmens reaz
actus reus)+zcause of actionclaimmotion to dismiss12(b)(6)zrule 12motion for summary judgment	discoveryinterrogatory
depositionrequest for productionrequest for admissionz	in limine	voir diredirect examinationcross-examinationclosing argumentopening statementjury instructionappealwrit of certiorariremand
settlementnegotiation	mediationarbitration
injunctionztemporary restraining orderzpreliminary injunctionzpermanent injunctionspecific performancezcompensatory damageszpunitive damageszliquidated damagesznominal damagesztreble damagesclass actionjoinderinterventioninterpleaderzdeclaratory judgmentmandamusprohibitionzquo warranto)&	complaintanswercounterclaimcross-claimthird-party complaintappellate brieftrial briefzmemorandum of lawzmemorandum in supportzmemorandum in oppositionzreply briefzamicus briefzmajority opinionzconcurring opinionzdissenting opinionzplurality opinionz
per curiamzbench memorandumdemand lettercease and desistzengagement letterzretainer agreementzunilateral contractzbilateral contractzindemnity clausezhold harmlesszforce majeurestatute
regulation	ordinancezlegislative historyzcommittee reportzfloor debatezregulatory commentznotice and commentzproposed rulez
final rulezconsent decree)*	plaintiff	defendant
petitioner
respondent	appellantappelleemovantz
non-movantzcounsel of recordzamicus curiaezfact witnesszexpert witnessztrier of factjuryzbench trialztrial courtzappellate courten banczsupreme courtzcircuit courtdistrict courtzmagistrate judgezadministrative law judge
arbitratormediatorzspecial masterreceivertrusteezguardian ad litemcuratorsecepaftcdojnlrbeeoccfpbosha
prosecutorzpublic defenderzsolicitor generalzattorney general))justicezprocedural justicezdistributive justiceequityfairnessdue processsubstantive due processprocedural due processequal protectionlibertyautonomyprivacy
efficiencyzkaldor-hickszpareto efficiency
deterrenceretributionrehabilitationincapacitationrestitutionz
good faithz	bad faithzfiduciary dutyduty of carezduty of loyaltyconfidentialityattorney-client privilegework product doctrinezconflict of interestzzealous advocacyzcandor to the tribunaljurisdictionpersonal jurisdictionsubject matter jurisdictionsovereignty
legitimacyzrule of lawzseparation of powers
federalism
preemptionzdormant commerce clause)"zstrategic considerationzrisk assessmentzclient counselingzethical dilemmazcost-benefit analysiszprobabilistic outcomezworst-case scenarioleveragezbargaining positionzsettlement valuezlitigation riskexposurezprecedent strengthzcircuit splitzemerging trendznovel issuezmatter of first impressionzpolicy argumentztextualist argumentzoriginalist argumentzpurposive interpretationlegislative intentzslippery slopezparade of horribleszfloodgates argumentzchilling effect
overrulingzdistinguishing precedentzlimiting holdingzextending doctrinezcase strategyzlitigation posturezforum selectionchoice of law)Bcontract	agreementbreachdamages	liability
negligencetortr   venuer   constitution	amendmentzcivil rightscriminalfelonymisdemeanorarraignment
indictmentverdictsentencejudgewitness	testimonyevidencemotionbriefpleadingsubpoenasummonswritzhabeas corpusattorneycounsellawyer	privilegecustodyalimonydivorceprobatewilltrustestatecorporationllcpartnershipshareholder	fiduciarypatent	trademark	copyright
employmentdiscrimination
harassmentzreal estateleasemortgagedeedeasement
bankruptcycreditorlien
collateralimmigrationasylumdeportationnaturalization      ?333333?g      ?g?ffffff?皙?r$   )rM   r    r"            r   termreturnc                 p   | j                         }|t        D cg c]  }|j                          c}v ry|t        D cg c]  }|j                          c}v ry|t        D cg c]  }|j                          c}v ry|t        D cg c]  }|j                          c}v ry|t
        D cg c]  }|j                          c}v ry|t        D cg c]  }|j                          c}v ry|t        D cg c]  }|j                          c}v ryyc c}w c c}w c c}w c c}w c c}w c c}w c c}w )	z;Get the tier level for a legal term (1-6, or 0 for general)rM   r    r"   r  r  r  r   )lowerLEGAL_TIER_1LEGAL_TIER_2LEGAL_TIER_3LEGAL_TIER_4LEGAL_TIER_5LEGAL_TIER_6LEGAL_GENERAL)r  
term_lowerts      r=   get_term_tierr     s    J6Aaggi66	<8a	8	8	<8a	8	8	<8a	8	8	<8a	8	8	<8a	8	8	=9a	9	9 7888889s(   DD$D	D$.D)D.8D3c                 P    t        |       }|dk\  rt        j                  |d      S y)z7Get the boost weight for a legal term based on its tierr   333333?)r  LEGAL_TIER_WEIGHTSr9   )r  tiers     r=   get_term_weightr#     s)    Dqy!%%dC00r?   )zissue spottingzelements analysiszcounterargument anticipationzrebuttal strategyzevidentiary foundationzpreservation of errorzdeferential reviewharmless errorplain errorzstructural errorwaiver
forfeitureestoppellacheszclean handszunclean handszparol evidence rulezstatute of limitationsstatute of fraudszbest evidence rulehearsayzhearsay exceptionzbusiness records exception)summary judgmentrm   rd   re   rf   Rule 56zCelotex trilogyno genuine disputematerial factzsummary adjudicationCelotexAnderson v. Liberty Lobby
Matsushitar`   )r   r   r   r   zduty of confidentialityzethics rule 1.6zclient confidencezprivileged communicationzcommon interest doctrinezjoint defensezcrime-fraud exceptionzwaiver of privilegezattorney client)rn   ro   rp   rq   rr   r   zmandatory disclosureprotective orderprivilege logzdocument productionRule 26
spoliationESIz!electronically stored information)r   r   r   zforum non conveniensminimum contactsspecific jurisdictiongeneral jurisdictionzlong-arm statutezdiversity jurisdictionzfederal questionzsupplemental jurisdictionremoval)	r   r   r   noticezopportunity to be heardfundamental rightszMathews v. Eldridgeri   rg   )
offer
acceptanceconsiderationzmutual assentzmeeting of the mindszcontract formationbargainzpromissory estoppelzdetrimental reliancer*  )r   r   zbreach of duty	causationproximate causebut-for causationr   foreseeabilityreasonable personzstandard of carezres ipsa loquitur)zfirst amendmentfree speechestablishment clausefree exercisezfourth amendmentsearch and seizureprobable causewarrantzfifth amendmentzself-incriminationzdouble jeopardytakingszsixth amendmentzright to counselzconfrontation clausezfourteenth amendmentr   r   incorporation)r+  	relevancezprobative valuezprejudicial effectzRule 403zcharacter evidencezprior bad actszRule 404zexpert testimonyDaubert
foundationauthentication)ry   r]   r^   r_   zclearly erroneousr$  r%  preservationr   oral argumentr{   affirmreversevacate)
summary_judgmentattorney_clientrn   r   due_processcontract_formationr   constitutionalr   	appellate)r   r   zpre-litigationzsettlement demandztolling agreementznotice of claimzadministrative exhaustion)r   r   rk   zamended complaintrl   zRule 8rc   r   r   r   zdefault judgmentzresponsive pleading)ro   rp   zproduction requestzexpert disclosurer5  zinitial disclosurezdiscovery disputezmotion to compelr3  r4  zsubpoena duces tecum)	zmotion in liminer,  zpretrial conferencer   r-  zpretrial orderzwitness listzexhibit listzDaubert motion)rs   rw   rt   rx   rv   ru   redirectzmotion for directed verdictzjudgment as a matter of lawzjury deliberationr   )zpost-trial motionzmotion for new trial
remittituradditurz judgment notwithstanding verdictJNOVzRule 59zRule 60)znotice of appealr   rT  rz   zpetition for reviewzinterlocutory appealr   r   )zjudgment enforcement	executiongarnishmentzjudgment liencontempt
collectionzsatisfaction of judgmentzjudgment debtor)
pre_filing	pleadingsrn   	pre_trialtrial
post_trialry   enforcementtextc                     | j                         i }t        j                         D ]$  \  }}t        fd|D              }|dkD  s |||<   & |rt	        ||j
                        S y)z/Detect which litigation phase a text relates toc              3   ,   K   | ]  }|v sd   ywrM   NrJ   ).0r  
text_lowers     r=   	<genexpr>z*detect_litigation_phase.<locals>.<genexpr>W  s     >$4:+=A>   	r   )keygeneral)r  LITIGATION_PHASESitemssummaxr9   )rl  phase_scoresphasetermsscorerq  s        @r=   detect_litigation_phaser~  R  sk    JL)//1 (u>%>>19"'L( <\%5%566r?         ??皙?333333?333333?)us_supreme_courtfederal_circuitfederal_districtstate_supremestate_appellatestate_trialadministrative	secondary)z"supreme court of the united statesz\d+\s+u\.?s\.?\s+\d+scotus)zcircuit court of appealsz\d+\s+f\.(?:2d|3d|4th)\s+\d+zcourt of appeals)r   z\d+\s+f\.\s*suppzu\.s\. districtzsupreme court of \w+zcourt of last resort)zcourt of appealzappellate divisionzintermediate appellate)r  r  r  r  r  c                     | j                         }t        j                         D ](  \  }}|D ]  }t        j                  ||      s|c c S  * y)z$Detect court level from text contentunknown)r  COURT_PATTERNSrw  research)rl  rq  levelpatternspatterns        r=   detect_court_levelr  u  sP    J)//1 x 	Gyy*-	 r?   )	affirmedfollowedadoptedapprovedzcited with approval
reaffirmedextendedzrelied uponzconsistent with)	reversed	overruleddisapproved
criticizedlimitedrejectedzdeclined to follow	abrogated
superseded)cited	discussed	mentioned
referencedzsee alsozcf.)distinguished
questionedzcalled into doubtzbut seecontramodified	clarifiednarrowedpositivenegativeneutralcautionc                     | j                         }ddddd}t        j                         D ]  \  }}|D ]  }||v s||xx   dz  cc<      |S )z)Analyze how citations are treated in textr   r  rM   )r  CITATION_SIGNALSrw  )rl  rq  	treatmentsignal_typesignalssignals         r=   analyze_citation_treatmentr    sg    JA!JI 0 6 6 8 ,W 	,F#+&!+&	,, r?   )
zChevron deferencezadministrative deferencezagency interpretationstep onestep twozambiguous statutezreasonable interpretationzChevron U.S.A. v. NRDCzAuer deferencezSkidmore deference)	zErie doctrinezsubstantive vs proceduralr   zErie Railroad v. Tompkinszfederal common lawzoutcome determinativeztwin aims of EriezHanna v. Plumerzvertical choice of law)ripenessmootnessstandingzcase or controversyzinjury in factrB  redressabilityzLujan v. Defenderszcapable of repetitionzpolitical questionzadvisory opinion)	zqualified immunityzclearly established lawzgovernment immunityzreasonable officerzconstitutional violationzHarlow v. Fitzgerald
sequencingzPearson v. Callahanzsovereign immunity)r   zRule 23zclass certificationcommonality
typicalityadequacy
numerositypredominancesuperiorityzopt-outzsettlement classzcy preszclass notice)
zres judicataclaim preclusionzissue preclusionzcollateral estoppelfinal judgmentzsame cause of actionsame parties	mutualityzoffensive collateral estoppelzdefensive collateral estoppel)rJ  rK  warrant requirementzexclusionary rulezfruit of the poisonous treezgood faith exceptionexigent circumstances
plain viewzconsent searchz
Terry stopzreasonable suspicion)rG  zcontent-basedzcontent-neutralrg   rh   time place mannerzprior restraintzpublic forumcommercial speechzsymbolic speechzcompelled speech)chevroneriejusticiabilityqualified_immunityclass_actionres_judicatafourth_amendmentfirst_amendmentqueryc                     | j                         g }t        j                         D ].  \  }}t        fd|dd D              s|j	                  |       0 |S )z!Get doctrines relevant to a queryc              3   B   K   | ]  }|j                         v   y wNr  rp  r  query_lowers     r=   rr  z)get_relevant_doctrines.<locals>.<genexpr>  s     Attzz|{*A   Nr"   )r  LEGAL_DOCTRINESrw  anyappend)r  relevantdoctriner|  r  s       @r=   get_relevant_doctrinesr    sV    ++-KH*002 &%AuRayAAOOH%& Or?   ))z\d+\s+U\.?S\.?\s+\d+r  )z\d+\s+S\.?\s*Ct\.?\s+\d+r  )z"\d+\s+L\.?\s*Ed\.?\s*(?:2d)?\s+\d+r  )z\d+\s+F\.(?:2d|3d|4th)?\s+\d+r  )z&\d+\s+F\.\s*Supp\.?(?:\s*2d|3d)?\s+\d+r  )z!\d+\s+S\.?\s*E\.?(?:\s*2d)?\s+\d+r  )z$\d+\s+S\.?\s*W\.?(?:\s*2d|3d)?\s+\d+r  )z$\d+\s+N\.?\s*E\.?(?:\s*2d|3d)?\s+\d+r  )z!\d+\s+N\.?\s*W\.?(?:\s*2d)?\s+\d+r  )z\d+\s+P\.?(?:\s*2d|3d)?\s+\d+r  )z\d+\s+A\.?(?:\s*2d|3d)?\s+\d+r  )z\d+\s+So\.?(?:\s*2d|3d)?\s+\d+r  )u*   \d+\s+U\.?S\.?C\.?(?:\.?A\.?)?\s*§?\s*\d+r   )u   \d+\s+C\.?F\.?R\.?\s*§?\s*\d+r   )z#Fed\.?\s*R\.?\s*Civ\.?\s*P\.?\s*\d+rule)zFed\.?\s*R\.?\s*Evid\.?\s*\d+r  c                     g }t         D ]b  \  }}t        j                  || t        j                        }|D ]3  }|j	                  |j                         ||j                         d       5 d |S )z0Extract and categorize legal citations from text)citationtypeposition)LEGAL_CITATION_PATTERNSr  finditer
IGNORECASEr  groupstart)rl  	citationsr  citation_typematchesmatchs         r=   extract_citationsr    sp    I"9 ++gtR]]; 	E!KKM%!KKM 	 r?   c                    t        |       }dddddd}|D ]g  }|d   dk(  r|dxx   dz  cc<   |d   dk(  r|dxx   dz  cc<   /|d   d	k(  r|d
xx   dz  cc<   E|d   dk(  r|dxx   dz  cc<   [|dxx   dz  cc<   i |S )z"Count citations by authority levelr   )supremecircuitdistrictstateotherr  r  r  rM   r  r  r  r  r  r  r  )r  )rl  r  countscits       r=   count_authoritative_citationsr    s    !$'Iqa!aPF 
!v;,,9"[--9"[..:!#[--7Oq O7Oq O
! Mr?   c                 x   | j                         g }t        j                         D ]Q  \  }}t        fd|D              r|j	                  |       ,|j                  dd      v sA|j	                  |       S t        j                         D ]Q  \  }}t        fd|D              r|j	                  |       ,|j                  dd      v sA|j	                  |       S t               }g }|D ]E  }|j                         |vs|j                  |j                                |j                  |       G |S )z(Expand query with related legal conceptsc              3   B   K   | ]  }|j                         v   y wr  r  r  s     r=   rr  z-expand_query_with_concepts.<locals>.<genexpr>
       =ttzz|{*=r  _ c              3   B   K   | ]  }|j                         v   y wr  r  r  s     r=   rr  z-expand_query_with_concepts.<locals>.<genexpr>  r   r  )
r  LEGAL_CONCEPT_CLUSTERSrw  r  extendreplacer  setaddr  )	r  expanded_termscluster_namer|  r  seenunique_termsr  r  s	           @r=   expand_query_with_conceptsr    s   ++-KN5;;= )e=u==!!%(!!#s+{:!!%() +002 )%=u==!!%(c3';6!!%(	) 5DL &::<t#HHTZZ\"%&
 r?   z)What's the standard for summary judgment?)genuine issuer/  r0  re   z(How does attorney-client privilege work?)confidentialcommunicationr   r&  z*What's required for personal jurisdiction?)r8  
purposefulr   zExplain Chevron deference)agencyinterpretation	deference	ambiguousz$What are the elements of negligence?)dutyr   rB  r   zHow does res judicata work?)r  r  r  zWhat is qualified immunity?)zclearly established
reasonableimmunityzExplain the Erie doctrine)substantive
proceduralz	state lawfederal       @)zU.S. Supreme CourtSupreme CourtSCOTUSRestatementzModel RuleszUniform Commercial CodeUCC)landmarkseminal	watershedcontrollingbinding)weightsourcestags)zCircuit CourtzCourt of AppealszFederal AppellateTreatisezWright & MillerProsserCorbin	Williston)zwell-reasonedauthoritativeinfluentialleading)zDistrict CourtzState Supreme Courtz
Law ReviewzAmerican Law ReportsALR)instructiveinformativeuseful皙?)r  r  r  r  r  outdatedzdistinguished away)gold_standardhighly_persuasive
persuasive	decliningmetadatac                 D   | j                         }|xs i j                  dd      j                         }t        d   d   D ]4  }|j                         |v s|j                         |v s(t        d   d   c S  t        d   d   D ]  }||v st        d   d   c S  t        d   d   D ]4  }|j                         |v s|j                         |v s(t        d   d   c S  t        d	   d   D ]4  }|j                         |v s|j                         |v s(t        d	   d   c S  y
)z7Calculate precedential weight based on source authoritysource r6  r'  r&  r9  r(  r7  r8  r	  )r  r9   PRECEDENTIAL_HIERARCHY)rl  r:  rq  r<  srctags         r=   get_precedential_weightrA  W  s@   Jn"!!(B/557F &o6yA E99;*$		v(=)/:8DDE
 &k26: A*)+6x@@A
 &&9:9E I99;*$		v(=)*=>xHHI
 &l3I> B99;*$		v(=),7AAB r?   chunk_metadataquery_contextc                 z   t        j                         j                  }| j                  d      }|sy||z
  }| j                  dd      j	                         }| j                  dg       }d|v sd|v st        d |D              r|d	k  ry
|dk  ry|dk  ryyt        d |D              ry|dk  ry|dk  ry|dk  ryy)z?Calculate weight based on temporal relevance of legal authorityyearr	  document_typer=  r(  r   r   c              3   $   K   | ]  }d |v  
 yw)r   NrJ   )rp  r  s     r=   rr  z,calculate_temporal_weight.<locals>.<genexpr>  s     ?]ST	Q?]   r  r  r*      r  r  c              3   $   K   | ]  }|d v  
 yw))r!  r"  r#  NrJ   )rp  r@  s     r=   rr  z,calculate_temporal_weight.<locals>.<genexpr>  s     
G3366
GrH     r
  2   r  r$   )r   nowrE  r9   r  r  )rB  rC  current_year
chunk_year	year_diffdoc_typer(  s          r=   calculate_temporal_weightrR  t  s    <<>&&L##F+Jz)I!!/26<<>Hfb)D H 8C?]X\?]<]q=^^ 
G$
GG2~	R	Rr?   )ifthenwhenwherezprovided thatunless	thereforethushenceaccordinglyconsequently	deductivezRule-based deductive reasoning)r  r&  r  description)	zon one handzon the other handbalanceweighzcompeting interestscountervailingzpolicy considerationszpublic interestzprivate rights      ?dialecticalzPolicy-based balancing analysis)		similarlylike	analogousdistinguishablezakin to
comparablezin contrastunlikezparallel toffffff?comparativez#Analogical reasoning from precedent)zthree-part testzfour-factorelementsprongsr  r  firstsecondthirdzmulti-factortotality of circumstancesg?	frameworkzStructured doctrinal analysis)plain meaningzplain languagezordinary meaningzstatutory textunambiguouszliteral interpretationzexpress termsr   textualz Textual/statutory interpretation)historicallyzat common lawtraditionallongstanding
originallyzfounding eraframerszoriginal understandingffffff?
historicalzHistorical/originalist analysis)rule_synthesispolicy_balancing
analogicaldoctrinal_frameworktextual_analysishistorical_analysisc                    | j                         }g }t        j                         D ]  \  }}d}g }|d   D ]  }||v s|dz  }|j                  |        |dkD  s5t	        d|t        |d         dz  z        }|j                  |t        |d      |d   |d   |d	   |d
d d        |j                  d d       |S )z)Identify legal reasoning patterns in textr   r  rM   r	  r   r    r&  r  r^  Nr  )schema
confidencer&  r  r^  matched_patternsc                     | d   S )Nr  rJ   xs    r=   <lambda>z+detect_argument_structure.<locals>.<lambda>  s
    , r?   Trt  rV  )r  ARGUMENT_SCHEMASrw  r  minlenroundsort)	rl  rq  detectedschema_namer  r  r  r  r  s	            r=   detect_argument_structurer    s    JH/557 Vj) 	1G*$1 ''0	1
 Q;S'S
1C-Ds-J"KLJOO%#J2 *v%m4$4Ra$8 * MM/M>Or?   )r   r=  zprocedural fairnessr   znotice and hearing)police powerzstate interestri   )rg   rh   zrational basis review)criminal procedurezadministrative lawz
family lawr  )relatedcontrastingtestsapplications)r   zsuspect classificationr=  zlegitimate government interestr  )rg   rh   ri   )zvoting rights	educationzemployment discrimination)rG  rI  rH  z
free presszcompelling interestzcontent-neutral regulation)rg   rh   r  )zpolitical speechr  zreligious liberty)rJ  rK  r  )r  consentr  z!reasonable expectation of privacyrq  )r  zadministrative searcheszborder searches)rs  zparol evidencezcourse of dealing)zcontra proferentemzejusdem generisznoscitur a sociis)rl  contextztrade usagezcourse of performance)r   r   
rescissionreformation)r  canonsr'  remedies)r   r   rB  r   rC  )zcontributory negligencezcomparative faultzassumption of risk)rF  rE  rD  )zmedical malpracticezproducts liabilityzpremises liability)r  r/  re   rf   )r.  zview evidence favorablyzdraw inferences)r0  r1  r2  )r-  zstatement of factsresponsereply)r  	standards	key_cases
procedures)r8  zpurposeful availmentz	fair play)r:  r9  r  )zInternational Shoezstream of commercezeffects test)zinternet jurisdictionzcontract disputesztort claims)r  typesr  r  )rZ  equal_protectionr  r  contract_interpretationr   rX  personal_jurisdictionc                    | j                         }g }t        j                         D ]  \  }}|j                  dd      }||v rc|j	                  |j                  dg              |j	                  |j                  dg              |j	                  |j                  dg              |j                  dg       D ])  }||v s|j	                  |j                  dg                 t        t        |            S )z.Get related legal concepts for query expansionr  r  r  r  r  )r  LEGAL_CROSS_REFERENCESrw  r  r  r9   listr  )r  r  r  r  refsdoctrine_cleanrel_terms          r=   get_related_conceptsr    s    ++-KG0668 $!))#s3[(NN488Ir23NN488GR01NN488K45 B/ 	H;&txx	267	 Gr?   c                     | j                         }t        j                         D ]I  }|j                  dd      }||v r|c S t        |   }|j	                  dg       dd D ]  }||v s|c c S  K y)z,Extract the primary legal doctrine from textr  r  r  Nr"   )r  r  keysr  r9   )rl  rq  r  r  r  r  s         r=   extract_primary_doctriner  5  s    J +//1 	 !))#s3Z'O &h/HHY+BQ/ 	 Dz!	 	  r?   r  )	primary_doctrinesupporting_doctrinesargument_structuresprecedential_weighttemporal_weightauthority_leveljurisdictional_relevancecitation_countrE  chunkc                 @   | j                  dd      }| j                  di       }t        |      }|r|dd | d<   |d   d   | d	<   t        ||      | d
<   t        |      | d<   t	        |      | d<   t        |      }t        |      | d<   t        d |D              | d<   | S )z&Add advanced legal metadata to a chunkcontentr=  r:  Nr"   r  r   r  primary_argument_typer  r  r  r  c              3   ,   K   | ]  }|d    dk(    yw)r  r  NrJ   rp  cs     r=   rr  z)enhance_chunk_metadata.<locals>.<genexpr>l  s      .,-&	''.s   has_supreme_court_citation)r9   r  rA  rR  r  r  r  r  )r  r  r:  arg_structuresr  s        r=   enhance_chunk_metadatar  U  s    ii	2&GyyR(H /w7N'5bq'9#$)7):6)B%& $;7H#ME
   9BE
 !9 AE
 "'*I!)nE
*- .1:. +E
&' Lr?   TFextract_text)	PdfReader)SentenceTransformerc            
       x    e Zd ZdZd Zd Zddededede	e   fdZ
dededede	e   fd	Zdededede	e   fd
Zy)
LLMServicezGLLM service using local DeepSeek for enrichment, Grok for chat fallbackc                    t         j                  j                  dd      | _        t         j                  j                  dd      | _        d| _        t         j                  j                  d      xs  t         j                  j                  dd      j                         j                  d	      j                  d
      | _        d| _        d| _	        | j                          | j
                  s| j                  st        d       y y y )NDEEPSEEK_LLM_URLz)http://127.0.0.1:5004/v1/chat/completionsDEEPSEEK_MODELz'deepseek-ai/DeepSeek-R1-Distill-Qwen-7BTGROK_API_KEYXAI_API_KEYr=  "'z$https://api.x.ai/v1/chat/completionszgrok-4-fast-reasoningzK[RAG] WARNING: Neither DeepSeek nor Grok available. LLM features will fail.)r7   r8   r9   deepseek_urldeepseek_modeldeepseek_availablestripgrok_api_keygrok_url
grok_model_check_deepseekprintr;   s    r=   __init__zLLMService.__init__  s    JJNN+=?jk jjnn-=?hi"&  ZZ^^N;`rzz~~m]_?`ggioopstzz{~>1 	&&t/@/@_` 0A&r?   c                 b   t         sd| _        y	 | j                  j                  dd      }t	        j
                  |d      }|j                  dk(  r t        d| j                          d	| _        yt        d
       d| _        y# t        $ r }t        d| d       d| _        Y d}~yd}~ww xY w)z$Check if local DeepSeek is availableFNz/v1/chat/completionsz
/v1/modelsr  )timeout   z [RAG] Local DeepSeek available: Tz;[RAG] Local DeepSeek not responding, will use Grok fallbackz#[RAG] Local DeepSeek check failed: z, will use Grok fallback)
HAS_REQUESTSr  r  r  requestsr9   status_coder  r  	Exception)r<   base_urlr  es       r=   r  zLLMService._check_deepseek  s    &+D#	,((001GVH||Ha8H##s*89L9L8MNO*.'SU*/' 	,7s:RST&+D##	,s   A!B 2B 	B.B))B.prompt
max_tokenstemperaturer  c                 n    | j                   r| j                  |||      }|r|S | j                  |||      S )z9Generate text using DeepSeek (primary) or Grok (fallback))r  _call_deepseek
_call_grok)r<   r  r  r  results        r=   generatezLLMService.generate  s>     ""(([IF vz;??r?   c           
         t         sy	 t        j                  | j                  ddi| j                  d|dg||ddd	      }|j
                  d
k(  ro|j                         }d|v rzt        |d         dkD  ri|d   d   d   d   }d|v r6d|v r2ddl} |j                  dd||j                        j                         }|S t        d|j
                          d| _        y# t        $ r}t        d|        d| _        Y d}~yd}~ww xY w)zCall local DeepSeek LLMNContent-Typeapplication/jsonuserroler  Fmodelmessagesr  r  streamx   headersjsonr  r  choicesr   messager  z<think>z</think>z<think>.*?</think>r=  flagsz[RAG] DeepSeek error: z[RAG] DeepSeek exception: )r  r  postr  r  r  r  r  r  subDOTALLr  r  r  r  )	r<   r  r  r  r  r2   r  r  r  s	            r=   r  zLLMService._call_deepseek  s1   	,}}!!');<!00*0V!D E",#.# H ##s*}}$T)_)=)A"9oa0;IFG G+
g0E!"("&&)>GSUS\S\"]"c"c"e"N.x/C/C.DEF*/'
 	  	,.qc23&+D##		,s   B6C   C   	D)DDc           
      :   t         r| j                  st        d       y	 t        d| j                   d       t	        j
                  | j                  d| j                   dd| j                  d|d	g||d
dd      }|j                  dk(  r3|j                         }d|v rUt        |d         dkD  rD|d   d   d   d   S t        d|j                   d|j                  r|j                  dd nd        y# t        $ r}t        d|        Y d}~yd}~ww xY w)zCall Grok API (fallback)z*[RAG] Grok API not configured (no API key)Nz[RAG] Calling Grok fallback (z)...zBearer r  )Authorizationr  r  r  Fr  -   r  r  r  r   r  r  z[RAG] Grok API error: z - rN   z(empty)z[RAG] Grok API exception: )r  r  r  r  r  r   r  r  r  r  rl  r  )r<   r  r  r  r  r2   r  s          r=   r  zLLMService._call_grok  s:   4#4#4>?	41$//1B$GH}}'.t/@/@.A%B$6
 "__*0V!D E",#.# H  ##s*}}$T)_)=)A	?1-i8CC.x/C/C.DC_g_l_lVZWZH[r{G|}~   	4.qc233	4s   B!C9 6C9 9	DDDN)rK   r$   )r@   rA   rB   rC   r  r  rI   rG   rH   r   r  r  r  rJ   r?   r=   r  r    s    Qa",(	@s 	@ 	@ 	@X`adXe 	@$S $c $ $RZ[^R_ $L! !# !E !hWZm !r?   r  c            
          e Zd ZdZdefdZdedefdZdedefdZ	ded	edefd
Z
dedee   fdZdedeeeeef      fdZddededeeeef      fdZdedee   dedee   fdZdededee   fdZdededefdZddededee   fdZy)PDFProcessorz4Advanced PDF processor with structure-aware chunkingconfigc                 $    || _         g d| _        y )N)u   copyright\s+©?\s*\d{4}zall rights reservedzpage\s+\d+\s+of\s+\d+z^\s*$z^\d+$z[a-f0-9]{32,}z	^[\s\W]*$)r  boilerplate_patternsr<   r  s     r=   r  zPDFProcessor.__init__-  s    %
!r?   	file_pathr  c                    |j                   j                         dk(  rht        r	 | j                  |      S t        r	 t        t        |            S t        r.	 t        |      }dj                  d |j                  D              S y|j                   j                         d	k(  r)	 t        |d
d      5 }|j                         cddd       S y# t        $ r}t        d|        Y d}~d}~ww xY w# t        $ r}t        d|        Y d}~d}~ww xY w# t        $ r}t        d|        Y d}~yd}~ww xY w# 1 sw Y   yxY w# t        $ r8 t        |d
d      5 }|j                         cddd       cY S # 1 sw Y   Y yxY ww xY w)z(Extract text using best available method.pdfzPyMuPDF failed: Nzpdfminer failed: 
c              3   D   K   | ]  }|j                         xs d   yw)r=  Nr  )rp  pages     r=   rr  z,PDFProcessor.extract_text.<locals>.<genexpr>N  s!     $X4T%6%6%8%>B%>$Xs    zPyPDF2 failed: r=  .txtrutf-8encodingzlatin-1)suffixr  HAS_PYMUPDF_extract_with_pymupdfr  r  HAS_PDFMINERpdfminer_extractrI   
HAS_PYPDF2r  joinpagesopenreadUnicodeDecodeError)r<   r  r  readerfs        r=   r  zPDFProcessor.extract_text9  s   !!#v-255i@@
 3+C	N;;
 1&y1F99$X6<<$XXX ##%/$)S7; $q668$ $ 9 ! 2,QC0112 ! 3-aS1223 ! 1OA3/001$ 	 & $)S9= $668$ $ $ 	$s   C C1 ,D $E 2D9	E 	C.C))C.1	D:DD	D6D11D69E>E E FE8,
F8F	=FFc                 4   t        j                  |      }g }	 |D ]E  }|j                  dt         j                  t         j                  z        }|j                  |       G 	 |j                          dj                  |      S # |j                          w xY w)z&High-fidelity extraction using PyMuPDFrl  r  r  )fitzr  get_textTEXT_DEHYPHENATETEXT_PRESERVE_WHITESPACEr  closer  )r<   r  docr  r  rl  s         r=   r  z"PDFProcessor._extract_with_pymupdf^  s    ii	"	 #}}V43H3H4KhKh3h}iT"# IIKyy IIKs   A
B Brl  filenamec                     |j                         |j                         }g d}t        fd|D              dk\  ryg d}t        fd|D              dk\  ryg d}t        fd	|D              dk\  ry
y)z)Detect document type for optimal chunking)bettingoddsspread	moneylineparlay
sportsbookhandicappingc              3   ,   K   | ]  }|v sd   ywro  rJ   rp  indrq  s     r=   rr  z4PDFProcessor.detect_document_type.<locals>.<genexpr>q  s     CS
1BqCrs  r    rR   )specificationrO   apiprotocol	algorithmc              3   ,   K   | ]  }|v sd   ywro  rJ   r5  s     r=   rr  z4PDFProcessor.detect_document_type.<locals>.<genexpr>v  s     ASsj/@qArs  rO   )abstractmethodologyresults
conclusion
referencesc              3   ,   K   | ]  }|v sd   ywro  rJ   r5  s     r=   rr  z4PDFProcessor.detect_document_type.<locals>.<genexpr>{  s     ES3*3DqErs  rP   rS   )r  rx  )r<   rl  r+  filename_lowersports_indicatorstech_indicatorsresearch_indicatorsrq  s          @r=   detect_document_typez!PDFProcessor.detect_document_typej  s{    ZZ\
!) oC-CCqH YAOAAQF aE/EEJr?   c                     |sg S d}t        j                  ||      }|D cg c]/  }t        |j                               dkD  s |j                         1 c}S c c}w )zSplit text into sentencesz?(?<=[.!?])\s+(?=[A-Z])|(?<=[.!?])\s*$|(?<=[.!?"\'])\s+(?=[A-Z])r*   )r  splitr  r  )r<   rl  sentence_endings	sentencesss        r=   split_into_sentencesz!PDFProcessor.split_into_sentences  sO    I ^HH-t4	#,DaAGGI0C	DDDs   !AAc           	      
   g }|j                  d      }t        |      D ]  \  }}|j                         }|rt        |      dk  r'|j	                         r1t        |      dk\  r#t        |      dk  r|j                  ||df       ht        j                  d|      }|st        d |j                         dd	 D              }|j                  d      }	t        |	      dkD  s|j                  |	|t        |d      f        |S )
zDetect headings in documentr  r"   r  P   rM   z^\s*(\d+)(?:\.(\d+))?\s+(.+)$c              3   &   K   | ]	  }|sd   ywro  rJ   )rp  gs     r=   rr  z/PDFProcessor.detect_headings.<locals>.<genexpr>  s     H!aAHs   Nr    )rH  	enumerater  r  isupperr  r  r  rx  groupsr  r  )
r<   rl  headingslinesilineline_strippednumbered_matchdepthheading_texts
             r=   detect_headingszPDFProcessor.detect_headings  s    

4  ' 	FGAt JJLM C$6$: $$&3}+=+Bs=GY\^G^1 56  XX&FVNH~'<'<'>r'BHH-33A6|$q(OO\1c%m$DE!	F$ r?   rQ  c                 &   t         j                  |t         d         }|d   }|d   }|d   }|d   }|d   }|rt        |j                               |k  rg S | j	                  |      }	|	r| j                  ||	|      }
|
r|
S | j                  ||      S )zStructure-aware chunkingrS   r   r   r   r!   r#   )DOCUMENT_TYPE_CONFIGSr9   r  r  r\  _chunk_by_sections_sentence_based_chunking)r<   rl  rQ  r  r   	chunk_min	chunk_maxoverlapr#   rT  chunkss              r=   
chunk_textzPDFProcessor.chunk_text  s    &**85J95UVL)
+,	+,	+,/s4::<(94I ''-,,T8VDF ,,T6::r?   rT  c                    g }|j                  d      }t        |      D ]  \  }\  }}}	|dz   t        |      k  r||dz      d   n
t        |      }
dj                  ||dz   |
       j	                         }|sYt        |      |d   k\  sk| j                  ||      }|D ]  }||d<   |	|d<    |j                  |        |S )zChunk text by detected sectionsr  rM   r   sectionsection_level)rH  rQ  r  r  r  r`  r  )r<   rl  rT  r  rd  rU  rV  r[  heading_posr  end_possection_contentsection_chunksr  s                 r=   r_  zPDFProcessor._chunk_by_sections  s    

4  6?x5H 		.1A1k5,-ECM,Ahq1uoa(s5zG"iikAog(FGMMOO3#76BR;S#S!%!>!>PV!W+ 3E'3E)$-2E/*3 n-		. r?   c                    | j                  |      }t        |      |d   k  rg S g }g }d}|D ]k  }t        |      dz   }||z   |d   kD  r|rdj                  |      j                         }	t        |	      |d   k\  r'|j	                  |	t        |      t        |	      d       t        |d   t        |            }
|
dkD  r||
 d	 ng }t        d
 |D              }|j	                  |       ||z  }||d   k\  st        |      |d   k\  sdj                  |      j                         }	t        |	      |d   k\  r'|j	                  |	t        |      t        |	      d       t        |d   t        |            }
|
dkD  r||
 d	 ng }t        d |D              }n |rWdj                  |      j                         }	t        |	      |d   k\  r'|j	                  |	t        |      t        |	      d       |S )z$Sentence-based chunking with overlapr#   r   rM   r   r  r   )r  sentence_count
char_countr!   Nc              3   8   K   | ]  }t        |      d z     ywro  r  rp  rK  s     r=   rr  z8PDFProcessor._sentence_based_chunking.<locals>.<genexpr>       $GASVaZ$G   r   c              3   8   K   | ]  }t        |      d z     ywro  rq  rr  s     r=   rr  z8PDFProcessor._sentence_based_chunking.<locals>.<genexpr>  rs  rt  )rL  r  r  r  r  r  rx  )r<   rl  r  rJ  rd  current_chunkcurrent_lengthr   sentence_lengthre  overlap_counts              r=   r`  z%PDFProcessor._sentence_based_chunking  s   --d3	y>F?33I! 	HH!(ma/O/&9I2JJ} XXm4::<
z?f-=&>>MM#-*-m*<&)*o#  !$F+=$>M@R SBORSBS}no >Y[!$$G$G!G  *o-N!55#m:LPVWfPg:g XXm4::<
z?f-=&>>MM#-*-m*<&)*o#  !$F+=$>M@R SBORSBS}no >Y[!$$G$G!G?	HD -0668J:&)9"::)&)-&8"%j/  r?   r  c                    |j                  dd      }t        |      |j                  dd      k  ry|j                         }| j                  D ])  }t	        j
                  ||t        j                        s) y t	        j                  d|      }t        |      dk  ry|r t        t        |            t        |      z  nd}|d	k  ryy
)zValidate chunk qualityr  r=  r   r   F\b\w+\br  r   r   T)	r9   r  r  r
  r  r  r  findallr  )r<   r  r  r  content_lowerr  wordsunique_ratios           r=   is_valid_chunkzPDFProcessor.is_valid_chunk  s    ))Ir*w<&**%5s;;  00 	Gyy-?	
 

:}5u:> 8=s3u:U3!#r?   Nsource_namec                    ||j                   }| j                  |      }|rt        |j                               dk  rg S | j	                  ||      }t
        j                  |t
        d         }| j                  ||      }g }t        |      D ]F  \  }}	| j                  |	|      s||	d<   ||	d<   t        |      |	d<   ||	d<   |j                  |	       H |S )z.Process a file and return chunks with metadatad   rS   r<  chunk_idtotal_chunksrF  )namer  r  r  rF  r^  r9   re  rQ  r  r  )
r<   r  r  rl  rQ  r  
raw_chunksrd  rV  r  s
             r=   process_filezPDFProcessor.process_file  s    #..K   +s4::<(3.I ,,T;?&**85J95UV __T84
 !*- 	%HAu""5&1"-h$%j!(+Jn%)1o&e$	% r?   )rS   r  )r@   rA   rB   rC   r   r  r   rI   r  r  rF  r	   rL  r   rG   r\  r
   r   re  dictr_  r`  boolr  r  rJ   r?   r=   r  r  *  s:   >

y 

#d #s #J
 t 
  
    ,E Ec EC DsC}1E,F 2;s ;c ;$tCQTH~BV ;.s d5k 4 TXY]T^ &5S 5$ 54: 5nD $ 4 0d  T
 r?   r  c                       e Zd ZdZdefdZd Zdedej                  fdZ
dee   dej                  fd	Zdefd
Zd Zd ZdedefdZd Zdee   fdZdedee   fdZddededee   fdZdee   deeef   fdZdedee   dedefdZy)VectorStorez0FAISS-based vector store with BM25 hybrid searchr  c                    || _         g | _        g | _        d | _        d | _        g | _        g | _        d| _        |j                  dz  | _	        |j                  dz  | _
        |j                  dz  | _        | j                          y )Nr   zfaiss_index.indexzdocuments_metadata.jsonzembeddings.npy)r  	documents
embeddingsindexr  searchable_textsdoc_lengthsavg_doc_lengthr   
index_filemetadata_fileembeddings_file_init_modelr  s     r=   r  zVectorStore.__init__@  s    

 " !**-@@#--0II%//2BB 	r?   c                     t         r#	 t        d      | _        d| _        t	        d       | j                  d| _        t	        d       yy# t
        $ r}t	        d|        d| _        Y d}~Cd}~ww xY w)zInitialize embedding modelzall-MiniLM-L6-v2i  z&[RAG] Loaded SentenceTransformer modelz"[RAG] SentenceTransformer failed: NzT[RAG] Using hash-based embeddings (install sentence-transformers for better results))HAS_SENTENCE_TRANSFORMERSr  r  embedding_dimr  r  )r<   r  s     r=   r  zVectorStore._init_modelR  su    $"01CD
%(">@
 ::!$Dhi 	  ":1#>?!

"s   "A
 
	A2A--A2rl  r  c                    | j                   r| j                   j                  |d      S t        j                  |j                               j	                         }t        j                  |t
        j                        j                  t
        j                        }|t
        j                  j                  |      dz   z  S )zGet embedding for textT)normalize_embeddings)dtypeg&.>)r  encodehashlibsha384digestnp
frombufferuint8astypefloat32linalgnorm)r<   rl  harrs       r=   _get_embeddingzVectorStore._get_embeddinga  s    ::::$$T$EE t{{}-446A--299"**EC"))..-455r?   textsc                     | j                   r| j                   j                  |dd      S t        j                  |D cg c]  }| j	                  |       c}      S c c}w )z!Get embeddings for multiple textsT)r  show_progress_bar)r  r  r  arrayr  )r<   r  r  s      r=   _get_embeddings_batchz!VectorStore._get_embeddings_batchk  sN    ::::$$UY]$^^88UCT003CDDCs   Ac                    | j                   j                         sy	 t        | j                   dd      5 }t        j                  |      | _        ddd       t        rH| j                  j                         r.t        j                  t        | j                              | _        nG| j                  j                         r-t        j                  t        | j                              | _        | j                          t!        dt#        | j
                         d       y# 1 sw Y   xY w# t$        $ r}t!        d	|        Y d}~yd}~ww xY w)
z!Load existing index and documentsFr  r  r  Nz[RAG] Loaded 
 documentsTz[RAG] Failed to load index: )r  existsr  r  loadr  	HAS_FAISSr  faiss
read_indexrI   r  r  r  r  _prepare_searchable_textsr  r  r  r<   r#  r  s      r=   r  zVectorStore.loadr  s    !!((*	d((#@ .A!%1. T__335"--c$//.BC
%%,,."$''#d.B.B*C"D**,M#dnn"5!6jAB. .  	045	s/   D, D CD,  D)%D, ,	E5EEc                 z   	 t        | j                  dd      5 }t        j                  | j                  |dd       ddd       t
        r@| j                  4t        j                  | j                  t        | j                               n^t        | j                        dkD  rFt        j                  t        | j                        t        j                   | j                               t#        d	t        | j                         d
       y# 1 sw Y   xY w# t$        $ r}t#        d|        Y d}~yd}~ww xY w)zSave index and documentswr  r  Fr    ensure_asciiindentNr   z[RAG] Saved r  z[RAG] Failed to save index: )r  r  r  dumpr  r  r  r  write_indexrI   r  r  r  r  saver  r  r  r  r  s      r=   r  zVectorStore.save  s    	6d((#@ KA		$..!%JK TZZ3!!$**c$//.BCT__%)D001288DOO3LMLT^^!4 5Z@AK K  	60455	6s.   D $DCD DD 	D:"D55D:c                    g | _         g | _        | j                  D ]n  }| j                  |      }| j                   j	                  |j                                | j                  j	                  t        |j                                      p | j                  r1t        | j                        t        | j                        z  | _	        yd| _	        y)zBuild searchable text cacher   N)
r  r  r  _build_searchable_textr  r  r  rH  rx  r  )r<   r*  rl  s      r=   r  z%VectorStore._prepare_searchable_texts  s     ">> 	7C..s3D!!((6##C

$56	7
 PTO_O_c$"2"23c$:J:J6KKefr?   r*  c           
         g }|j                  dd      }|rXt        j                  dd|t        j                        }|j	                  |j                  dd      j                  dd             |j                  d      r|j	                  |d          |j                  d	      r)|j	                  t        |j                  d	                   |j                  d
      rH	 |j	                  dj                  |j                  d
g       D cg c]  }t        |       c}             |j                  d      rH	 |j	                  dj                  |j                  dg       D cg c]  }t        |       c}             |j	                  |j                  dd             dj                  |      S c c}w # t        $ r Y w xY wc c}w # t        $ r Y Vw xY w)z#Build searchable text from documentr<  r=  z\.(pdf|txt)$r  r  r  -rg  summary
key_pointsthemesr  )	r9   r  r  r  r  r  rI   r  r  )r<   r*  partsr<  clean_sourcepr  s          r=   r  z"VectorStore._build_searchable_text  sx    2&66/2vR]]SLLL--c37??SIJ 779LLY( 779LLSWWY/0177< SXXsww|R7P&Q!s1v&QRS 778SXXswwx7L&M!s1v&MNO
 	SWWY+,xx 'R  'N sH    *F7 
F2F7 9*G #G5G 2F7 7	GGG 	GGc                 ~   	 | j                   j                  | j                   j                  dz         }t        |dd      5 }t	        j
                  | j                  |dd       ddd       t        j                  || j                          y# 1 sw Y   *xY w# t        $ r}t        d	|        Y d}~yd}~ww xY w)
zOPersist documents metadata safely without rewriting the FAISS index/embeddings..tmpr  r  r  Fr    r  Nz,[RAG] Failed to save metadata (checkpoint): )r  with_suffixr  r  r  r  r  r7   r  r  r  )r<   tmp_pathr#  r  s       r=   save_metadata_onlyzVectorStore.save_metadata_only  s    	F))55d6H6H6O6ORX6XYHhg6 K!		$..!%JKJJx!3!34K K  	F@DEE	Fs0   A B $B&(B BB 	B<$B77B<rd  c                 2   |sy|D cg c]  }|d   	 }}| j                  |      }t        r| j                  $t        j                  | j
                        | _        t        j                  |      j                  d      }t        j                  |       | j                  j                  |       njt        | j                  t        j                        r?t        | j                        dkD  r't        j                  | j                  |g      | _        n|| _        | j                   j#                  |       |D ]n  }| j%                  |      }| j&                  j)                  |j+                                | j,                  j)                  t        |j/                                      p | j,                  r0t1        | j,                        t        | j,                        z  | _        t5        dt        |       dt        | j                                 yc c}w )zAdd documents to the storeNr  r  r   z[RAG] Added z documents. Total: )r  r  r  r  IndexFlatIPr  r  r  r  normalize_L2r  
isinstancer  ndarrayr  vstackr  r  r  r  r  r  r  rH  rx  r  r  )r<   rd  r  contentsnew_embeddingsembeddings_npr  rl  s           r=   add_documentszVectorStore.add_documents  s   *01QAiL1133H=zz!"..t/A/AB
HH^4;;IFM}-JJNN=)$//2::63t;ORS;S"$))T__n,M"N"0f%  	7E..u5D!!((6##C

$56	7
 "%d&6&6"7#d>N>N:O"ODS[M)<S=P<QRS7 2s   Hr  
new_chunksc                    g }g }t        | j                        D ]i  \  }}|j                  d      |k7  s|j                  |       t        r3t        | j                        |kD  sL|j                  | j                  |          k || _        t        rt        j                  | j                        | _
        |r|D cg c]  }|d   	 }}| j                  |      j                  d      }	t        j                  |	       | j                  j                  |	       n1|rt        j                   |      nt        j                   g       | _        | j#                          | j%                  |       yc c}w )z"Replace all documents for a sourcer<  r  r  N)rQ  r  r9   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )
r<   r  r  	keep_docskeep_embeddingsrV  r*  dr  r  s
             r=   replace_sourcezVectorStore.replace_source  s.    	/ 	?FAswwx K/  % S%9A%=#**4??1+=>		? # **4+=+=>DJ2;<QAiL<<!77AHHS
"":.

z*;Jbhh7PRPXPXY[P\DO&&( 	:& =s   =E/r  kc                   : | j                   sg S |j                         }t        j                  d|      }t	        |      }t        |      }t        |      }g }t               }	|D ]  }
t        |
      dkD  s|
|	vst        |
      }|j                  |
t        j                  dt        j                  |
      z   dz   t        j                        |dk\  |t        |
      d       |	j                  |
        t         D ]  }||v s||	vst        |      }|j                  |t        j                  dt        j                  |      z   dz   t        j                        d|t        |      d       |	j                  |        |dd D ]  }|j                         |	vst        |      }|j                  |t        j                  dt        j                  |      z   dz   t        j                        d|t        |      d	z  dd
       |	j                  |j                                 | j#                  |      }| j%                  |      }t&        r| j(                  t+        j,                  |g      j/                  d      }t1        j2                  |       t5        |dz  t        | j                               }| j(                  j7                  ||      \  }}t9        t;        |d   |d               }nt=        | j>                  t*        j@                        rt        | j>                        dkD  rvt+        jB                  | j>                  |      }t5        |dz  t        | j                               }t+        jD                  |      | d ddd   }|D cg c]	  }||   |f }}ng }g }|D ]  \  }}|t        | j                         k\  r | j                   |   }|t        | jF                        k  r| jF                  |   nd:|jI                  d:      }d}g }g }|D ]f  }|d   j7                  :      s|jI                  d      r|j                  |d          n|j                  |d          ||jI                  dd      z  }h t5        d|      }| jK                  |||      }| jL                  jN                  tQ        |      z  | jL                  jR                  |z  z   | jL                  jT                  |z  z   } t	        |      }!d}"|dk7  r|!|k(  rd}"tW        |      }#tX        jI                  |#d      }$d}%|D ]2  }&tZ        jI                  |&g       }'t]        :fd|'D              s.|%dz  }%4 t_        |      }(d})|(d   dkD  r|)dt5        |(d   d      z  z  })|(d   dkD  r|)dt5        |(d   d      z  z  })ta        |      }*d}+|*d    |*d!   kD  rd"}+n|*d!   |*d    dz  kD  rd#}+tc        ||      },te        |      }-tg        |      }.d}/|.r|.d   }0|0d$   dkD  r|0d   }/ti        |      }1d}2|1dd% D ](  }3|3j                         :j                         v s$|2dz  }2* t5        d&|2      }2| |"z  |$z  |%z  |)z  |+z  |,z  |-z  |/z  |2z  }4|j                  ||jk                         D 5ci c]  \  }}5|dk7  s||5 c}5}|4| tQ        |      ||||tm        |"d      tm        |$d      tm        |%d      tm        |)d      tm        |+d      tm        |,d      tm        |-d      tm        |/d      tm        |2d      d'	|!|#|.r|.d   ndto        |      d(        |jq                  d) d*       g }6i }7|D ]{  }8|8d+   jI                  d,d-      }9|7jI                  |9d      | jL                  jr                  k  sB|6j                  |8       |7jI                  |9d      d.z   |7|9<   t        |6      k\  sz |6S  |6S c c}w c c}5}w )/z?Enhanced hybrid vector + keyword search with legal intelligencer{  r    z\br   )r  r  is_legalr"  r&  TN   r  )r  r  r  r"  r&  expandedr  r  r  r=  r          r  r  r  r&  r   r	  ru  r{  c              3   ^   K   | ]$  }|j                         j                         v  & y wr  r  )rp  r  searchable_texts     r=   rr  z%VectorStore.search.<locals>.<genexpr>  s%     Z4tzz|'<'<'>>Zs   *-gQ?r  g{Gz?r"   r  皙?r  r  g?g?r  r  rb  )	r{  r   r  r  r  precedentialtemporalargument	cross_ref)r  r:  r}  
base_scorevector_scorekeyword_score
bm25_scorematched_termsexpanded_matchesboostsr{  court_levelargument_structurer  c                     | d   S )Nr}  rJ   r  s    r=   r  z$VectorStore.search.<locals>.<lambda>  s
    aj r?   r  r:  r<  r  rM   ):r  r  r  r|  r~  r  r  r  r  r  r  compileescaper  r#  r  LEGAL_TERMS_compute_idfr  r  r  r  r  r  r  r  r  r  r  zipr  r  r  dotargsortr  r9   _compute_bm25r  r%   rH   r&   r(   r  JURISDICTION_HIERARCHYr  r  r  r  rA  rR  r  r  rw  r  r  r  r)   );r<   r  r  r  query_wordsquery_phaserelevant_doctrinesr	  keyword_patternsadded_termsword	term_tierr  	idf_tablequery_embedding
oversamplescoresindicesvector_resultssimilaritiestop_indicesrV  
candidatesr  idxr*  doc_contentr  r  r  r  r  combined	doc_phasephase_boostr  jurisdiction_boostdoctrine_boostr  doctrine_termscitation_countscitation_boostr  treatment_boostprecedential_boosttemporal_boostr  argument_boostbest_argrelated_conceptscross_ref_boostconceptfinal_scorevr>  source_countsr  r<  r  s;                                                             @r=   r  zVectorStore.search  s/   ~~Ikkmjj[9 .e4 4E: 4E: e   
	&D4y1}[!8)$/	 '' !zz%"))D/*AE*I2==Y )Q%-d3)  %
	&   
	&D{"t;'>)$/	 '' !zz%"))D/*AE*I2==Y $%-d3)  %
	& #3B' 	.Dzz|;.)$/	 '' !zz%"))D/*AE*I2==Y $%-d3c9 $)  

-	. %%&67	 --e4/ hh'89@@KO/QUC$78J"jj//LOFG!#fQi"<=N $//2::63t;ORS;S!vvdooG QDNN(;<
 jj6
{|DTrTJ@K!L1<?A"6!L!L!# 
!/ y	L#c$..))..%C<?#dF[F[B\<\d33C8bdO'')_=K  MM!% :Y<&&7uuZ((//&	:%,,QvY7!QUU8S%99M:  ]3M ++C1A9MJ 0053FF11MAB..;<  0<IKi'I,D" -[9K!7!;!;K!M !N. +!0!4!4Xr!BZ>ZZ"d*N+ <KHO Ny)A-$_Y-G)K"KKy)A-$_Y-G)K"KK 3;?I!O$y'<<"&:&:)>)BB"& "9c!J 7s;N 7{CN N)!,L)C/%-h%7N  4E:!O+BQ/ ,==?o&;&;&==#t+O, "$8O $k14FFW()+:;=OP()+9:<KLK &.1iikLdaQ)^QTL$& %l 3!.(!.$4";2$)*<a$@ %na 8 %na 8!&!:$)*<a$@ %na 8 %na 8!&!:
 #*;InQ&7t$<[$I1 Ay	x 	0$? 	Az]&&x;F  +dkk.O.OOq!(5(9(9&!(Dq(Hf%w<1$	 ] "MP Ms   _"_'-_'r  c                    i }t        | j                        }|dk(  r|S |D ]]  d   }||v rt        fd| j                  D              }t	        j
                  ||z
  dz   |dz   z  dz         }t        |d      ||<   _ |S )zCompute IDF for termsr   r  c              3   L   K   | ]  }d    j                  |      sd  yw)r  rM   N)r  )rp  rl  r  s     r=   rr  z+VectorStore._compute_idf.<locals>.<genexpr>  s#     V4AiL<O<OPT<UQVs   $$r$   r	  r  )r  r  rx  r  mathlogry  )r<   r  r  
total_docsr  dfidfr  s          @r=   r  zVectorStore._compute_idf  s    	(
? 	,AV9Dy V4#8#8VVB((JOc1b3h?#EFC!#smIdO	, r?   r  r  c                    |r|t        | j                        k\  ry| j                  |   }|t        | j                        k  r| j                  |   nt        |j                               }| j                  xs |xs d}d}|D ]q  }t        |d   j                  |            }	|	dk(  r&|j                  |d   d      }
|	t        dz   z  }|	t        dt        z
  t        ||z  z  z   z  z   }||
||z  z  z  }s |S )z!Compute BM25 score for a documentr  rM   r  r   r  r	  )	r  r  r  rH  r  r|  r9   BM25_K1BM25_B)r<   r  r  r  doc_textdoc_lenavg_lenbm25r  tfr,  	numeratordenominators                r=   r  zVectorStore._compute_bm25  s   3#d&;&;"<<((-+.T5E5E1F+F$""3'CPXP^P^P`La%%55A 	4AQy\))(34BQw--&	3/Cgm,Iw#,7WCT9U*UVVKC9{233D	4 r?   N)r*   )r@   rA   rB   rC   r   r  r  rI   r  r  r  r	   r  r  r  r  r  r
   r  r  r  r  rG   r  rH   r  r  rJ   r?   r=   r  r  =  s   :y $j63 62:: 6E49 E Ed *6
g$ 3 BF TDJ  TD'# '4: '<]C ]C ]d ]~T$Z De4D $ T
 t PU r?   r  c            	       P    e Zd ZdZdefdZdedee   fdZ	dedededee   fd	Z
y
)
OCRServicezVOCR fallback for scanned PDFs using DeepSeek OCR (port 5003) and Tesseract (port 5002)r  c                     || _         t        j                  j                  dd      | _        t        j                  j                  dd      | _        y )NDEEPSEEK_OCR_URLr,   TESSERACT_OCR_URLr.   )r  r7   r8   r9   r-   r/   r  s     r=   r  zOCRService.__init__  s;     "

/AC^ _!#0CE`!ar?   r  r  c                     t         sy| j                  | j                  |d      }|r|S | j                  | j                  |d      }|S )zCTry OCR extraction with DeepSeek (primary) and Tesseract (fallback)NDeepSeek	Tesseract)r  _try_ocrr-   r/   )r<   r  rl  s      r=   r  zOCRService.extract_text"  sI     }}T22IzJK }}T33YLr?   urlservice_namec           	      <   	 t        d| d| d       t        |d      5 }d|j                  |dfi}t        j                  ||d      }d	d	d	       j
                  d
k(  r|j                         }|j                  d      r4|j                  d      r#t        d| dt        |d          d       |d   S |j                  d      r#t        d| dt        |d          d       |d   S y	# 1 sw Y   xY w# t        $ r}t        d| d|        Y d	}~y	d	}~ww xY w)zTry OCR servicez[RAG] Trying z OCR at ...rbfilezapplication/pdfiX  )filesr  Nr  successrl  z[RAG] z OCR extracted z charsz OCR failed: )
r  r  r  r  r   r  r  r9   r  r  )	r<   r@  r  rA  r#  rF  r  r2   r  s	            r=   r?  zOCRService._try_ocr0  s2   	;M,xuC@Ai& H!)..!5F!GH#==E3GH ##s*}}88I&488F+;F<.DL@Q?RRXYZ<'XXf%F<.DL@Q?RRXYZ<' H H  	;F<.aS9::	;s5   C7 *C+
A+C7 63C7 +C40C7 7	D DDN)r@   rA   rB   rC   r   r  r   r   rI   r  r?  rJ   r?   r=   r8  r8    sP    `by bd x} C D  QT r?   r8  c                       e Zd ZdZdefdZdedefdZdededefdZ	dd	ed
e
dee   fdZdededee   fdZd	edee   fdZdededefdZddee   dee   fdZy)EnrichmentServicez&Service for LLM-based chunk enrichmentr  c                     || _         t               | _        t        t        j
                  j                  dd            | _        d| _        y )NENRICHMENT_BUDGETi?B r   )	r  r  llmrG   r7   r8   r9   enrichment_budgetenrichment_usedr  s     r=   r  zEnrichmentService.__init__L  s7    <!$RZZ^^4G%P!Q r?   r  r  c                    |j                  dd      }|rt        |      dk  r|S | j                  ||      }| j                  | j                  k  r:| j                  ||      }|r&|j                  |       | xj                  dz  c_        |j                         }|j                  dd      |d<   |j                  dg       |d<   |j                  dg       |d<   d|d	<   |S )
z1Enrich a single chunk with LLM-generated metadatar  r=  rL  rM   r  r  r  Tenriched)r9   r  _generate_basic_cardrN  rM  _enhance_with_llmupdatecopy)r<   r  r  cardenhancedenriched_chunks         r=   enrich_chunkzEnrichmentService.enrich_chunkR  s    ))Ir*#g,+L ((%8 $"8"88--gt<HH%$$)$ $(HHY$;y!'+xxb'A|$#'88Hb#9x %)z" r?   r  c                    t        j                  d|      }|D cg c]/  }t        |j                               dkD  s |j                         1 }}|rdj	                  |dd       dd n|dd }|dd D cg c]  }|dd 	 }}| j                  |      }||||j                  d	d
      |j                  dd      dS c c}w c c}w )z)Generate basic knowledge card without LLMz(?<=[.!?])\s+r*   r  Nr    r   r     r<  r  r  r   )r  r  r  r<  r  )r  rH  r  r  r  _extract_themesr9   )r<   r  r  rJ  rK  r  r  r  s           r=   rQ  z&EnrichmentService._generate_basic_cardp  s     HH-w7	(1I1S^b5HQWWYI	I 4=#((9Ra=)$3/'$3- (1!}5!ag5
5 %%g. $ii)4		*a0
 	
 J 6s   !B=B=6Crl  
max_themesc                    t        j                  d|j                               }|sg S h d}i }|D ].  }||vst        |      dk\  s|j	                  |d      dz   ||<   0 t        |j                         d d      }|d	| D cg c]  \  }}|d
k\  s| }	}}|j                         }
g }t        dft        dft        d
ft        dft        dft        dffD ]L  \  }}|D ]2  }||
v s||	vs||vs|j                  |       t        |      |k\  s2 n t        |      |k\  sL n |D ]+  }||	vs|	j                  |       t        |	      |d
z  k\  s+ n |	d	| S c c}}w )z'Extract themes using frequency analysisz\b[a-zA-Z]{4,}\b>%   alsobeeneachevenfromhaveintojustre  moremostonlysomethanthatthemtheythisverywerewhatrU  r   withyouraboutbeingcouldr  theirthererV  whichwouldshouldbecausethroughr  r   rM   c                     | d   S NrM   rJ   r  s    r=   r  z3EnrichmentService._extract_themes.<locals>.<lambda>  s
    1 r?   Tr  Nr    r  r  r"   )r  r|  r  r  r9   sortedrw  r  r  r  r  r  r  r  )r<   rl  r\  r~  
stop_wordsword_countsr  sorted_wordscountr  rq  legal_themes_found
tier_termstier_numr  s                  r=   r[  z!EnrichmentService._extract_themes  s   

.

=I

  	AD:%#d)q.$/OOD!$<q$@D!	A
 k//1~tT*6{
*CR;4uPQz$RR ZZ\
1a0<2C1a0<2C%
 
	 J # :%$f*<M_A_&--d3-.*<	
 %&*4
	 ' 	D6!d#v;*q.0		 kz""1 Ss   EE	base_cardc                     d|dd  d|d    d}	 | j                   j                  |dd	      }|r| j                  |      S 	 y# t        $ r}t	        d
|        Y d}~yd}~ww xY w)zEnhance card with LLMzsYou are refining a knowledge card for a sports betting document. Extract the most important information.

EXCERPT:
NrK   z

CURRENT SUMMARY:
r  a   

Provide an improved knowledge card in this exact format:
Summary: <1-2 sentences capturing the main point about sports betting>
Key Points:
- <concise bullet 1>
- <concise bullet 2>
- <concise bullet 3>
Themes: theme1, theme2, theme3

Begin the card now:r  r   r  r  z[RAG] LLM enhancement error: )rL  r  _parse_llm_responser  r  )r<   r  r  r  r  r  s         r=   rR  z#EnrichmentService._enhance_with_llm  s     	#  9  
$	7xx((CS(QH//99 
   	71!566	7s   0A 	A%A  A%c                     |syi }t        j                  d|t         j                        }|r%|j                  d      j	                         dd |d<   g }t        j                  d|t         j                        }|r|j                  d      j	                         j                  d      }|D ]\  }|j	                         }|j                  d      r|dd j	                         }|s:t        |      d	kD  sI|j                  |dd
        ^ |r|dd	 |d<   t        j                  d|      }|rT|j                  d      j                  d      D 	cg c]  }	|	j	                          }
}	|
D 	cg c]  }	|	s|		 c}	dd	 |d<   |r|S dS c c}	w c c}	w )z%Parse LLM output into structured cardNz)Summary:\s*(.+?)(?=Key Points:|Themes:|$)rM   r   r  z Key Points:\s*(.*?)(?=Themes:|$)r  r  r  rZ  r  zThemes:\s*(.+),r  )	r  r  r  r  r  rH  
startswithr  r  )r<   rl  r  summary_matchr  kp_matchrU  rW  themes_matchr  r  s              r=   r  z%EnrichmentService._parse_llm_response  s|    		"NPTVXV_V_` - 3 3A 6 < < >t DF9 
99@$		RNN1%++-33D9E 2zz|??3'8>>+DCIM%%d4Cj12 #-bq>F<  yy!2D9)5););A)>)D)DS)IJAaggiJFJ+17aQ7;F8v)T) K7s   F+F3ForiginalrU  c                    d|j                  dd       d|j                  dd       dd|j                  d	d
       g}|j                  d      r/|j                  d       |d   D ]  }|j                  d|         |j                  d      r&|j                  ddj                  |d                 |j                  d|        dj                  |      S )z$Compose enriched content for storagez	[Source: r<  Unknownz	 | Chunk r  ?]z	Summary: r  r=  r  zKey Points:z- r  zThemes: z, z
Original Content:
r  )r9   r  r  )r<   r  rU  rU  points        r=   _compose_enriched_contentz+EnrichmentService._compose_enriched_content  s     956iUX@Y?ZZ[\B/01

 88L!LL'l+ +r%\*+ 88HLL8DIId8n$=#>?@,XJ78yyr?   Nrd  c           
         g }t        |      }t        |      D ]c  \  }}| j                  |      }|j                  |       |r ||dz   |       |dz   dz  dk(  sBt	        d|dz    d| d| j
                   d       e |S )z(Enrich all chunks with progress trackingrM   r*   r   z[RAG] Enriched /z chunks (LLM used: ))r  rQ  rX  r  r  rN  )r<   rd  progress_callbackrP  totalrV  r  rW  s           r=   enrich_all_chunksz#EnrichmentService.enrich_all_chunks
  s    F!&) 	cHAu!..u5NOON+ !!a%/A|q Awaw6I$J^J^I__`ab	c r?   )r  r  )r@   rA   rB   rC   r   r  r
   rX  rI   rQ  rG   r	   r[  r   rR  r  r  r  rJ   r?   r=   rI  rI  I  s    0!y !$ 4 <
C 
 
 
./#C /#S /#c /#b  (4. : *  *  *D #  T  c  &T
 tTXz r?   rI  	templates)template_folder)case_bpz,[RAG] Case management API loaded at /cases/*z%[RAG] Case management not available: idler=  ENRICHMENT_CHECKPOINT_EVERYr*   )
statusprogressr  llm_used
updated_at
started_atr<  total_targetdone_targetcheckpoint_everyzenrichment_status.jsonc                  N   	 t         j                  t         j                  dz         } t        | dd      5 }t	        j
                  t        |dd       ddd       t        j                  | t                y# 1 sw Y   $xY w# t        $ r}t        d	|        Y d}~yd}~ww xY w)
z4Persist enrichment status for resume after restarts.r  r  r  r  Fr    r  Nz+[RAG] Failed to persist enrichment status: )ENRICHMENT_STATUS_FILEr  r  r  r  r  enrichment_statusr7   r  r  r  )r  r#  r  s      r=   _save_enrichment_status_to_diskr  G  s    A)556L6S6SV\6\](C'2 	JaII'qI	J


834	J 	J  A;A3?@@As.   4B A7"B 7B <B 	B$BB$c            	      P   	 t         j                         rUt        t         dd      5 } t        j                  |       }ddd       t        t              rt        j                  |       t        j                  dd       t        j                  dd	       t        j                  d
d       t        j                  dd	       t        j                  dd       t        j                  dd       t        j                  dd       t        j                  dd	       t        j                  dd	       t        j                  dt        t        j                  j                  dd                   t        j                  d      dk(  rodt        d<   t        j                  d
      xs dt        d
<   t!        j"                  t$        j&                        j)                         dz   t        d<   t+                yy# 1 sw Y   xY w# t        $ r}t        d|        Y d}~d}~ww xY w)zRRestore enrichment status on startup (and mark stale running jobs as interrupted).r  r  r  Nz([RAG] Failed to load enrichment status: r  r  r  r   r  r=  r  r  r  r<  r  r  r  r  r*   runninginterrupted4Previous enrichment was interrupted. You can resume.Z)r  r  r  r  r  r  r  r  rS  r  r  
setdefaultrG   r7   r8   r9   r   rM  r   utc	isoformatr  )r#  r2   r  s      r=   !_load_enrichment_status_from_diskr  R  s   >!((*,cGD $yy|$$%!((.
   62  Q/  B/  Q/  t4  t4  40  3  2  !3SHegi9j5kl X&)3&3(#!!), FE 	)$ +3,,x||*D*N*N*PSV*V,'') 4)$ $  >8<==>s-   &H G6-H 6H ;H 	H%H  H%FLASK_API_KEYzeventheodds-flask-api-key-2025c                 .     t                fd       }|S )Nc                      t         j                  j                  dd      }|t        k7  rt	        ddi      dfS  | i |S )Nz	X-API-Keyr=  errorzInvalid API keyi  )r   r  r9   r  r   )argskwargsapi_keyr#  s      r=   	decoratedz"require_api_key.<locals>.decoratedx  sG    //%%k26m#G%678#==$!&!!r?   r   )r#  r  s   ` r=   require_api_keyr  w  s!    
1X" "
 r?   z9[RAG] Initializing Advanced Legal Document RAG Service...job_idr  r  r  c                 ~    t         5  |||t        j                         d|t        | <   ddd       y# 1 sw Y   yxY w)zUpdate upload progress)r  r  r  updatedN)progress_locktimeupload_progress)r  r  r  r  extras        r=   update_progressr    sB    	 
 yy{	#

 #

 
 
s   #3<r  r+  c                     	 t        | ddd       t        j                  |      }|rt        |j	                               dk  r#t        | ddd       t
        j                  |      }|rt        |j	                               dk  rt        | ddd	       y
t        | dddt        |       d       t        j                  ||      }t        j                  ||      }|st        | ddd       y
t        |      D ]"  \  }}||d<   ||d<   t        |      |d<   ||d<   $ t        | dddt        |       d       t        j                  ||       t        | ddd       t        j                          t        | dddt        |       dt        |      |t        t        j                               y
# t        $ rF}t        d|        dd
l}	|	j!                          t        | dddt#        |              Y d
}~y
d
}~ww xY w)z#Process uploaded file in background
processingr*   z Extracting text from document...r  rI  z%Text extraction failed, trying OCR...failedr   z$Failed to extract text from documentN(   z
Extracted z characters. Chunking...z'No valid chunks extracted from documentr<  r  r  rF  <   zCreated z! chunks. Generating embeddings...Z   Saving index...	completedzSuccessfully processed! Added z chunks.)chunks_addedrF  total_documentsz[RAG] Error processing file: Error: )r  pdf_processorr  r  r  ocr_servicerF  re  rQ  vector_storer  r  r  r  r  	traceback	print_excrI   )
r  r  r+  rl  rQ  rd  rV  r  r  r  s
             r=   process_file_backgroundr    s   6Ab2TU )))4 s4::<(3.FL"6]^++I6Ds4::<(3.FHa1WXbJs4ykIa2bc !55dHE ))$9FHa1Z[ "&) 	.HAu&E(O !E*$'KE.!%-E/"		. 	bHS[MIj2kl 	##Hf5b2CDK,S[MBV" 6 67	
  A-aS12!ws1vh-?@@	As&   BF. AF. +CF. .	G=7<G88G=r  c                      t        d      S )zServe the public landing pagezlanding.html)r   rJ   r?   r=   landingr    s     >**r?   z/adminc                       y)z@Serve upload interface with chunk viewer and enrichment controlsuS  <!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Legal Document RAG - Document Upload</title>
    <style>
        * { box-sizing: border-box; margin: 0; padding: 0; }
        body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); min-height: 100vh; padding: 20px; color: #e4e4e7; }
        .container { max-width: 1000px; margin: 0 auto; }
        h1 { color: #fbbf24; margin-bottom: 10px; font-size: 2rem; }
        .subtitle { color: #9ca3af; margin-bottom: 30px; }
        .card { background: rgba(255,255,255,0.05); border-radius: 12px; padding: 24px; margin-bottom: 20px; border: 1px solid rgba(255,255,255,0.1); }
        .card h2 { color: #fbbf24; font-size: 1.25rem; margin-bottom: 16px; }
        .stats { display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 16px; }
        .stat { background: rgba(251,191,36,0.1); padding: 16px; border-radius: 8px; text-align: center; }
        .stat-value { font-size: 1.75rem; font-weight: bold; color: #fbbf24; }
        .stat-label { color: #9ca3af; font-size: 0.75rem; }
        form { display: flex; flex-direction: column; gap: 16px; }
        input[type="text"], input[type="file"], select { padding: 12px; border-radius: 8px; border: 1px solid rgba(255,255,255,0.2); background: rgba(0,0,0,0.2); color: white; font-size: 1rem; }
        input[type="file"] { cursor: pointer; }
        button { background: #fbbf24; color: #1a1a2e; padding: 12px 20px; border: none; border-radius: 8px; font-weight: 600; font-size: 0.9rem; cursor: pointer; transition: all 0.2s; }
        button:hover { background: #f59e0b; transform: translateY(-1px); }
        button:disabled { opacity: 0.5; cursor: not-allowed; transform: none; }
        button.secondary { background: #6366f1; color: white; }
        button.secondary:hover { background: #4f46e5; }
        button.danger { background: #ef4444; color: white; }
        .btn-group { display: flex; gap: 10px; flex-wrap: wrap; }
        .progress-container { margin-top: 20px; }
        .progress-bar { height: 8px; background: rgba(255,255,255,0.1); border-radius: 4px; overflow: hidden; }
        .progress-fill { height: 100%; background: linear-gradient(90deg, #fbbf24, #f59e0b); transition: width 0.3s; }
        .progress-text { margin-top: 8px; color: #9ca3af; font-size: 0.875rem; }
        .result { padding: 16px; border-radius: 8px; margin-top: 16px; }
        .result.success { background: rgba(34,197,94,0.2); border: 1px solid rgba(34,197,94,0.3); }
        .result.error { background: rgba(239,68,68,0.2); border: 1px solid rgba(239,68,68,0.3); }
        .result.info { background: rgba(59,130,246,0.2); border: 1px solid rgba(59,130,246,0.3); }
        .tabs { display: flex; gap: 10px; margin-bottom: 20px; border-bottom: 1px solid rgba(255,255,255,0.1); padding-bottom: 10px; }
        .tab { padding: 8px 16px; cursor: pointer; border-radius: 6px; color: #9ca3af; }
        .tab.active { background: rgba(251,191,36,0.2); color: #fbbf24; }
        .tab:hover { background: rgba(255,255,255,0.05); }
        .chunk-list { max-height: 500px; overflow-y: auto; }
        .chunk-item { background: rgba(0,0,0,0.2); padding: 16px; border-radius: 8px; margin-bottom: 12px; border-left: 4px solid #6366f1; }
        .chunk-item.enriched { border-left-color: #22c55e; }
        .chunk-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }
        .chunk-source { color: #fbbf24; font-weight: 600; font-size: 0.875rem; }
        .chunk-badge { padding: 2px 8px; border-radius: 4px; font-size: 0.75rem; }
        .chunk-badge.enriched { background: rgba(34,197,94,0.3); color: #22c55e; }
        .chunk-badge.raw { background: rgba(156,163,175,0.3); color: #9ca3af; }
        .chunk-summary { color: #e4e4e7; margin-bottom: 8px; font-size: 0.9rem; }
        .chunk-themes { display: flex; gap: 6px; flex-wrap: wrap; margin-bottom: 8px; }
        .theme-tag { background: rgba(99,102,241,0.3); color: #a5b4fc; padding: 2px 8px; border-radius: 4px; font-size: 0.75rem; }
        .chunk-content { color: #9ca3af; font-size: 0.8rem; background: rgba(0,0,0,0.2); padding: 10px; border-radius: 4px; white-space: pre-wrap; max-height: 150px; overflow-y: auto; }
        .pagination { display: flex; justify-content: center; gap: 10px; margin-top: 16px; }
        .hidden { display: none !important; }
    </style>
</head>
<body>
    <div class="container">
        <h1>🏆 Legal Document RAG</h1>
        <p class="subtitle">Advanced Document Processing with FAISS + BM25 + DeepSeek LLM Enrichment</p>
        
        <div class="tabs">
            <div class="tab active" onclick="showTab('status')">📊 Status</div>
            <div class="tab" onclick="showTab('upload')">📤 Upload</div>
            <div class="tab" onclick="showTab('chunks')">📄 View Chunks</div>
            <div class="tab" onclick="showTab('enrich')">✨ Enrichment</div>
        </div>
        
        <!-- Status Tab -->
        <div id="tab-status" class="card">
            <h2>📊 System Status</h2>
            <div class="stats" id="stats">
                <div class="stat"><div class="stat-value" id="docCount">-</div><div class="stat-label">Total Chunks</div></div>
                <div class="stat"><div class="stat-value" id="enrichedCount">-</div><div class="stat-label">Enriched</div></div>
                <div class="stat"><div class="stat-value" id="rawCount">-</div><div class="stat-label">Raw</div></div>
                <div class="stat"><div class="stat-value" id="fileCount">-</div><div class="stat-label">Files</div></div>
                <div class="stat"><div class="stat-value" id="searchType">-</div><div class="stat-label">Search</div></div>
            </div>
        </div>
        
        <!-- Upload Tab -->
        <div id="tab-upload" class="card hidden">
            <h2>📤 Upload Document</h2>
            <form id="uploadForm">
                <input type="text" id="apiKey" placeholder="API Key" value="eventheodds-flask-api-key-2025">
                <input type="file" id="fileInput" accept=".pdf,.txt" required>
                <div class="btn-group">
                    <button type="submit" id="submitBtn">Upload & Process</button>
                    <button type="button" class="secondary" onclick="uploadWithEnrich()">Upload + Enrich</button>
                </div>
            </form>
            <div class="progress-container" id="progressContainer" style="display:none;">
                <div class="progress-bar"><div class="progress-fill" id="progressFill" style="width:0%"></div></div>
                <div class="progress-text" id="progressText">Starting...</div>
            </div>
            <div id="result"></div>
        </div>
        
        <!-- Chunks Tab -->
        <div id="tab-chunks" class="card hidden">
            <h2>📄 View Chunks</h2>
            <div style="display:flex; gap:10px; margin-bottom:16px; flex-wrap:wrap;">
                <select id="chunkFilter" onchange="loadChunks()">
                    <option value="all">All Chunks</option>
                    <option value="enriched">Enriched Only</option>
                    <option value="raw">Raw Only</option>
                </select>
                <select id="sourceFilter" onchange="loadChunks()">
                    <option value="">All Sources</option>
                </select>
                <button class="secondary" onclick="loadChunks()">🔄 Refresh</button>
            </div>
            <div class="chunk-list" id="chunkList">Loading...</div>
            <div class="pagination" id="pagination"></div>
        </div>
        
        <!-- Enrichment Tab -->
        <div id="tab-enrich" class="card hidden">
            <h2>✨ LLM Enrichment</h2>
            <p style="color:#9ca3af; margin-bottom:16px;">Use DeepSeek LLM to generate summaries, key points, and themes for all chunks.</p>
            <div class="btn-group">
                <button onclick="startEnrichment()">🚀 Enrich All Raw Chunks</button>
                <button class="secondary" onclick="resumeEnrichment()">▶️ Resume</button>
                <button class="secondary" onclick="checkEnrichmentStatus()">🔄 Check Status</button>
            </div>
            <div class="progress-container" id="enrichProgress" style="display:none;">
                <div class="progress-bar"><div class="progress-fill" id="enrichProgressFill" style="width:0%"></div></div>
                <div class="progress-text" id="enrichProgressText">Starting...</div>
            </div>
            <div id="enrichResult"></div>
        </div>
    </div>
    
    <script>
        const BASE_URL = window.location.pathname.replace(/\/admin.*$/, '').replace(/\/$/, '');
        let currentPage = 1;
        
        function showTab(tabName) {
            document.querySelectorAll('.card').forEach(c => c.classList.add('hidden'));
            document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
            document.getElementById('tab-' + tabName).classList.remove('hidden');
            event.target.classList.add('active');
            
            if (tabName === 'chunks') loadChunks();
            if (tabName === 'status') loadStatus();
            if (tabName === 'enrich') checkEnrichmentStatus();
        }
        
        async function loadStatus() {
            try {
                const resp = await fetch(BASE_URL + '/documents', {cache:'no-store'});
                const data = await resp.json();
                document.getElementById('docCount').textContent = data.total_chunks || 0;
                document.getElementById('enrichedCount').textContent = data.total_enriched || 0;
                document.getElementById('rawCount').textContent = (data.total_chunks || 0) - (data.total_enriched || 0);
                document.getElementById('fileCount').textContent = data.total_sources || 0;
                
                // Update source filter
                const sourceSelect = document.getElementById('sourceFilter');
                sourceSelect.innerHTML = '<option value="">All Sources</option>';
                (data.documents || []).forEach(d => {
                    sourceSelect.innerHTML += `<option value="${d.source}">${d.source} (${d.chunk_count})</option>`;
                });
                
                // Get search type
                const statusResp = await fetch(BASE_URL + '/status', {cache:'no-store'});
                const statusData = await statusResp.json();
                document.getElementById('searchType').textContent = statusData.has_faiss ? 'FAISS' : 'Cosine';
            } catch(e) {
                console.error('Status load failed:', e);
            }
        }
        
        async function loadChunks() {
            const filter = document.getElementById('chunkFilter').value;
            const source = document.getElementById('sourceFilter').value;
            const chunkList = document.getElementById('chunkList');
            chunkList.innerHTML = 'Loading...';
            
            let url = BASE_URL + '/chunks?page=' + currentPage + '&per_page=10';
            if (filter === 'enriched') url += '&enriched=true';
            if (filter === 'raw') url += '&raw=true';
            if (source) url += '&source=' + encodeURIComponent(source);
            
            try {
                const resp = await fetch(url, {cache:'no-store'});
                const data = await resp.json();
                
                if (!data.chunks || data.chunks.length === 0) {
                    chunkList.innerHTML = '<p style="color:#9ca3af;">No chunks found</p>';
                    return;
                }
                
                chunkList.innerHTML = data.chunks.map(c => `
                    <div class="chunk-item ${c.enriched ? 'enriched' : ''}">
                        <div class="chunk-header">
                            <span class="chunk-source">${c.source} #${c.chunk_id}</span>
                            <span class="chunk-badge ${c.enriched ? 'enriched' : 'raw'}">${c.enriched ? '✓ Enriched' : 'Raw'}</span>
                        </div>
                        ${c.summary ? `<div class="chunk-summary"><strong>Summary:</strong> ${c.summary}</div>` : ''}
                        ${c.themes && c.themes.length ? `<div class="chunk-themes">${c.themes.map(t => `<span class="theme-tag">${t}</span>`).join('')}</div>` : ''}
                        <div class="chunk-content">${c.content_preview}</div>
                    </div>
                `).join('');
                
                // Pagination
                const pag = data.pagination;
                document.getElementById('pagination').innerHTML = `
                    <button ${pag.page <= 1 ? 'disabled' : ''} onclick="currentPage=${pag.page-1};loadChunks()">← Prev</button>
                    <span style="color:#9ca3af;">Page ${pag.page} of ${pag.pages} (${pag.total} chunks)</span>
                    <button ${pag.page >= pag.pages ? 'disabled' : ''} onclick="currentPage=${pag.page+1};loadChunks()">Next →</button>
                `;
            } catch(e) {
                chunkList.innerHTML = '<p style="color:#ef4444;">Error loading chunks</p>';
            }
        }
        
        async function startEnrichment() {
            const apiKey = document.getElementById('apiKey').value.trim() || 'eventheodds-flask-api-key-2025';
            document.getElementById('enrichProgress').style.display = 'block';
            document.getElementById('enrichResult').innerHTML = '';
            
            try {
                const resp = await fetch(BASE_URL + '/enrich', {
                    method: 'POST',
                    headers: {'X-API-Key': apiKey, 'Content-Type': 'application/json'}
                });
                const data = await resp.json();
                
                if (data.error) {
                    document.getElementById('enrichResult').innerHTML = `<div class="result error">❌ ${data.error}</div>`;
                } else {
                    pollEnrichment();
                }
            } catch(e) {
                document.getElementById('enrichResult').innerHTML = `<div class="result error">❌ Error: ${e.message}</div>`;
            }
        }

        async function resumeEnrichment() {
            // Resume is the same API call as start; the server will skip already-enriched chunks.
            await startEnrichment();
        }
        
        async function pollEnrichment() {
            const poll = async () => {
                const resp = await fetch(BASE_URL + '/enrichment-status', {cache:'no-store'});
                const data = await resp.json();
                
                document.getElementById('enrichProgressFill').style.width = data.progress + '%';
                document.getElementById('enrichProgressText').textContent = data.message || 'Processing...';
                
                if (data.status === 'completed') {
                    document.getElementById('enrichResult').innerHTML = `<div class="result success">✅ ${data.message}</div>`;
                    loadStatus();
                    return;
                } else if (data.status === 'failed') {
                    document.getElementById('enrichResult').innerHTML = `<div class="result error">❌ ${data.message}</div>`;
                    return;
                } else if (data.status === 'interrupted') {
                    document.getElementById('enrichResult').innerHTML =
                      `<div class="result info">⏸️ Interrupted. Progress saved. Click <strong>Resume</strong> to continue.</div>`;
                    return;
                } else if (data.status === 'idle') {
                    document.getElementById('enrichProgress').style.display = 'none';
                    return;
                }
                
                setTimeout(poll, 1000);
            };
            poll();
        }
        
        async function checkEnrichmentStatus() {
            const resp = await fetch(BASE_URL + '/enrichment-status', {cache:'no-store'});
            const data = await resp.json();
            
            if (data.status === 'running' || data.status === 'interrupted') {
                document.getElementById('enrichProgress').style.display = 'block';
                document.getElementById('enrichProgressFill').style.width = data.progress + '%';
                document.getElementById('enrichProgressText').textContent = data.message;
                if (data.status === 'running') {
                  pollEnrichment();
                } else {
                  document.getElementById('enrichResult').innerHTML =
                    `<div class="result info">⏸️ Interrupted. Progress saved. Click <strong>Resume</strong> to continue. (LLM calls: ${data.llm_used || 0})</div>`;
                }
            } else {
                document.getElementById('enrichProgress').style.display = 'none';
                document.getElementById('enrichResult').innerHTML = `<div class="result info">Status: ${data.status} | LLM calls: ${data.llm_used || 0}</div>`;
            }
        }
        
        async function uploadWithEnrich() {
            const fileInput = document.getElementById('fileInput');
            const apiKey = document.getElementById('apiKey').value.trim() || 'eventheodds-flask-api-key-2025';
            
            if (!fileInput.files[0]) {
                document.getElementById('result').innerHTML = '<div class="result error">Please select a file</div>';
                return;
            }
            
            document.getElementById('submitBtn').disabled = true;
            document.getElementById('progressContainer').style.display = 'block';
            document.getElementById('progressText').textContent = 'Uploading with enrichment...';
            
            const formData = new FormData();
            formData.append('file', fileInput.files[0]);
            
            try {
                const resp = await fetch(BASE_URL + '/enrich-upload', {
                    method: 'POST',
                    headers: {'X-API-Key': apiKey},
                    body: formData
                });
                const data = await resp.json();
                if (data.job_id) pollProgress(data.job_id, apiKey);
            } catch(e) {
                document.getElementById('result').innerHTML = `<div class="result error">❌ ${e.message}</div>`;
                document.getElementById('submitBtn').disabled = false;
            }
        }
        
        // Standard upload
        document.getElementById('uploadForm').onsubmit = async (e) => {
            e.preventDefault();
            const fileInput = document.getElementById('fileInput');
            const apiKey = document.getElementById('apiKey').value.trim() || 'eventheodds-flask-api-key-2025';
            
            if (!fileInput.files[0]) {
                document.getElementById('result').innerHTML = '<div class="result error">Please select a file</div>';
                return;
            }
            
            document.getElementById('submitBtn').disabled = true;
            document.getElementById('progressContainer').style.display = 'block';
            
            const formData = new FormData();
            formData.append('file', fileInput.files[0]);
            
            try {
                const resp = await fetch(BASE_URL + '/upload', {
                    method: 'POST',
                    headers: {'X-API-Key': apiKey},
                    body: formData
                });
                const data = await resp.json();
                if (data.job_id) pollProgress(data.job_id, apiKey);
            } catch(e) {
                document.getElementById('result').innerHTML = `<div class="result error">❌ ${e.message}</div>`;
                document.getElementById('submitBtn').disabled = false;
            }
        };
        
        async function pollProgress(jobId, apiKey) {
            const poll = async () => {
                const resp = await fetch(BASE_URL + '/progress/' + jobId, {cache:'no-store'});
                const data = await resp.json();
                
                document.getElementById('progressFill').style.width = data.progress + '%';
                document.getElementById('progressText').textContent = data.message || 'Processing...';
                
                if (data.status === 'completed') {
                    document.getElementById('result').innerHTML = `<div class="result success">✅ ${data.message}</div>`;
                    document.getElementById('submitBtn').disabled = false;
                    loadStatus();
                    return;
                } else if (data.status === 'failed') {
                    document.getElementById('result').innerHTML = `<div class="result error">❌ ${data.message}</div>`;
                    document.getElementById('submitBtn').disabled = false;
                    return;
                }
                setTimeout(poll, 500);
            };
            poll();
        }
        
        loadStatus();
    </script>
</body>
</html>rJ   rJ   r?   r=   adminr    s    |r?   boostc                 f    | dk\  rddddS | dk\  rddd	dS | d
k\  rddddS | dk\  rddddS ddddS )z=Calculate precedential strength confidence (0-100) with labelr  _   zGold Standardz&Supreme Court or Restatement authorityr}  labelr^  r  rN  zHighly PersuasivezCircuit Court or major treatiser  A   
Persuasivez'District Court or state court authorityr	  rL  StandardzGeneral legal authorityrK  	DecliningzMay be overruled or supersededrJ   r  s    r=   !calculate_precedential_confidencer  ]
  se    |oFnoo	#&9Jkll	#lClmm	#jAZ[[kBbccr?   c                 f    | dk\  rddddS | dk\  rddd	dS | d
k\  rddddS | dk\  rddddS ddddS )z-Calculate jurisdictional relevance confidencer  r  r  zBinding precedent nationwider  r  rN  zFederal CircuitzBinding in circuitr  F   zState SupremezBinding in stater  r  zState AppealszPersuasive in staterL  GeneralzGeneral jurisdictionrJ   r  s    r=   !calculate_jurisdiction_confidencer  k
  se    |oFdee	#&7H\]]	#oFXYY	#oF[\\i@VWWr?   c                 P    | dk\  rddddS | dk\  rddd	dS | d
k\  rddddS ddddS )z/Calculate argument structure quality confidencerj  r  zStrong Structurez'Clear analogical or doctrinal frameworkr  r  K   zGood StructurezWell-organized legal argumentr{  r  ModeratezStandard argument structurer  BasiczSimple presentationrJ   r  s    r=   calculate_argument_confidencer  y
  sQ    |&8Irss	#&6Gfgg	$jA^__g>STTr?   r  c                    | j                  di       }|j                  dd      df|j                  dd      df|j                  dd      df|j                  d	d      df|j                  d
d      df|j                  dd      df|j                  dd      dfd}d}|j                         D ]/  \  }\  }}t        dt        d|dz
  dz  dz              }|||z  z  }1 t	        |      }|dk\  rd}	d}
n"|dk\  rd}	d}
n|dk\  rd}	d}
n|dk\  rd}	d}
nd }	d!}
||	|
|j                         D ci c]  \  }}|t        |d   d"       c}}d#S c c}}w )$z6Calculate overall confidence score for a search resultr  r  r	  r'   r   g?r  g333333?r  r  g?r  r  r  )r  r   r  r  r  r  r  r   r  r4  g?U   z	Very Highz!Highly authoritative and relevantr  HighzStrong legal authority7   r  z+Relevant but verify with additional sourcesr  Lowz#Limited authority, use with cautionzVery Lowz%Weak authority, requires verificationr    )r}  r  r^  factors)r9   rw  r  ry  rG   r  )r  r  r  weighted_sumfactorr  r&  
normalizedr}  r  r^  r  r$  s                r=   calculate_overall_confidencer  
  s   ZZ"%F  NC8$?NC8$?ZZ
C0$7ZZ
C0$7ZZ
C0$7jjc2D9ZZ
C0$7G L#*==? ,c!eckS%83%>?@

V++,
 E {9	".	"C	";= "29--/B$!QAuQqT1~%B	  Cs   ,Ez/healthGET)methodsc                  j    t        dt        t        j                        t        t
        t        d      S )zHealth checkhealthy)r  r  	has_faisshas_sentence_transformershas_pymupdf)r   r  r  r  r  r  r  rJ   r?   r=   healthr  
  s0     |556%>"  r?   z/statusc            
         t        d t        j                  D              } t        j                  }d|j
                  rdnd|j
                  |j
                  r|j                  ndt        |j                        d}t        t        t        j                        t        |       t        t        t        rdndt        rd	nd
|d      S )zGet system statusc              3   @   K   | ]  }|j                  d d        ywr<  r=  Nr9   rp  r  s     r=   rr  zstatus.<locals>.<genexpr>
  s     F!!%%"%F    Grok API (grok-4-fast-reasoning)zDeepSeek (local)zGrok APIN)chat_llmenrichment_llmr  r  grok_availabler  z
hash-basedzFAISS + BM25 HybridzCosine + BM25 Hybrid)r  total_filesr  r  embedding_modelsearch_typerL  )r  r  r  enrichment_servicerL  r  r  r  r  r   r  r  r  )r'  rL  llm_infos      r=   r  r  
  s     F|/E/EFFG 
 
 C6030F0F,J!44030F0F#,,Ds//0H |5567|%>4M0S_09,?U  r?   z/uploadPOSTc                     dt         j                  vrt        ddi      dfS t         j                  d   } | j                  st        ddi      dfS t	        | j                        j
                  j                         }|dvrt        ddi      dfS t        | j                        }t        j                  |z  }| j                  t        |             t        t        j                               dd	 }t        |d
dd       t        j                   t"        |||fd      }|j%                          t        d|d|d      dfS )zUpload and process a documentrE  r  No file providedr   Empty filenamer  r  z$Only PDF and TXT files are supportedN   startingr   z%File uploaded, starting processing...T)targetr  daemonz"File uploaded. Processing started.rG  r  r  r+     )r   rF  r   r+  r   r  r  r   r  r   r  rI   uuiduuid4r  	threadingThreadr  r  )rE  extsafe_filenamer  r  threads         r=   upload_filer&  
  s8    W]]"!345s::== D==!123S88 t}}

$
$
*
*
,C
""!GHI3NN $DMM2M.IIIc)n r"FFJ+RS&i/F
 LLN7!	 
 	 r?   z/progress/<job_id>c                     t         5  | t        v rt        t        |          cddd       S 	 ddd       t        dddd      dfS # 1 sw Y   xY w)zGet upload progressNr  r   zJob not found)r  r  r    )r  r  r   )r  s    r=   get_progressr)    sX     
 4_$?6234 4$4 iQ?STVYYY4 4s   AAz/askc                  z
   ddl } | j                         }t        j                  d      xs i }|j                  dd      j	                         }|j                  dd      }|j                  dd      }|st        d	d
i      dfS |j                  dd      }t        j                  ||      }|s't        dg t        t        j                        dd      S g }d}	t               }
g }|D ]  }|j                  di       }|j                  dd      }|r|r|j                  d      r|	dz  }	d|j                  dd       }|j                  dg       }|r.|j                  |dd        |ddj                  |dd        z  }|j                  dg       }|r|
j                  |       |j                  |       |j                  |d   dd         dj                  |      }d |dd!  d"| d#}d}	 t        j                   }|j#                  |d$d%&      }|rd'}|j	                         }n
d(}d)|dd* z   }g }|D ]  }|j                  di       }|j                  d/d0      t+        |d1   d      |d   dd2 |j                  dd      |j                  d      r|j                  dd      dd3 nd|j                  dg       d4}|rZt+        |j                  d5|d1         d      t+        |j                  d6d      d      t+        |j                  d7d      d      t+        |j                  d8d      d      |j                  d9g       |j                  d:g       |j                  d;i       |j                  d<d=      |j                  d>d0      |j                  d?      |j                  d@      dA|dB<   |j                  d;i       }t-        |j                  dCdD            t/        |j                  dEdD            t1        |j                  dFdD            t3        |      dG|dH<   |j                  |        t5        | j                         |z
  dIz        }||t        t        j                        t        |      |	t7        |
      ddJ |dd ||dK	}|r-t9        |      t;        |      t        t=        |            dL|dM<   	 t?        ||       t        |      S # t$        $ r2}t'        d+|        d,t)        |      dd-  }d.|dd* z   }Y d}~d}~ww xY w# t$        $ r"}t'        dN|        Y d}~t        |      S d}~ww xY w)Oz2Query the RAG system with enriched context supportr   NTsilentquestionr=  use_enrichedinclude_analysisr  zQuestion is requiredr   r  r  r  z8I don't have enough information to answer that question.)r   r'  chunks_searchedenriched_countr:  rP  Fr  rM   z**Summary**: r  r    z
**Key Points**: z; r"   r  r  rN   z

---

zYou are an AI Legal Research Assistant. Your role is to help users understand legal concepts,
case law, statutes, and legal procedures based on the retrieved legal documents.

=== RETRIEVED LEGAL KNOWLEDGE ===
i  z

=== QUESTION ===
am  

INSTRUCTIONS:
1. Answer based on the retrieved legal documents and your legal knowledge.
2. Cite specific sources, cases, or statutes when applicable.
3. Explain legal concepts clearly for the user's understanding.
4. If the question involves specific case law or procedures, reference the relevant details.
5. If you cannot find sufficient information, acknowledge limitations and suggest areas to research.
6. Always maintain professional legal language while being accessible.

IMPORTANT: This is for educational/research purposes only. Remind users to consult a licensed attorney for specific legal advice.

ANSWER:rL   r4  r  r  zNone (fallback to context)zGI could not generate a specific answer. Here is the relevant context:

i	  z[RAG] LLM generation error: r  rL  z/Error generating answer. Here is the context:

r<  r  r}     r  )r<  r}  previewrP  r  r  r  r  r  r  r  r  r  r{  ru  r  r  r  )r  r  r  r  r  r  r  r{  r  r  r  analysisr  r	  r   r  )precedential_strengthr  argument_qualityoverallr  r   r*   )	r   r'  r1  chunks_returnedr2  r  r  r  response_time_ms)detected_phaser  expanded_terms_countquery_analysisz[Analytics] Track error: ) r  r   get_jsonr9   r  r   r  r  r  r  r  r  r  rS  r  r  rL  r  r  r  rI   r  r  r  r  r  rG   r  r~  r  r  track_query)time_module
start_timer2   r-  r.  r/  r  r>  context_partsr2  
all_themesall_key_pointsr  metais_enrichedpartr  r  r  r  r  r  generated_answerr   r  r'  source_datar  r:  response_datas                                 r=   askrK    s~    !!#J4(.BDxx
B'--/H88ND1Lxx 2D9!7893>>aA !!(a!0GP"<#9#9:	
  	 MNJN 5uuZ$hhz51KDHHY,?aN"488Ir#:";<D,3J%%j!n5,TYYz"1~-F,GHHXXh+F!!&)  &   9ds!34+5.   /G 	$   

 F. HV%)) $..v$TW.X9H%++-F3H`cjkplpcqqF G $$uuZ$hhx31W:q)|DS)U38<8Ktxx	2.t4QUhhx,
 #AEE,'
$CQG %aeeNA&> B!&quu_a'@!!D#AEE,$:A>!"!;$%EE*<b$A%%"-w	2 uu]I>&'ee,@&A$%EE*<$='K
# UU8R(F)J6::VdfiKj)k,MfjjYgilNm,n$A&**ZY\B]$^7:	)K% 	{#I$$N K,,.;tCD |556w<(z"3B'$Ra(,
M 5h?"8"B$'(B8(L$M+
&'/Hm, =!!W  V,QC01SVCR[M*DwuPT~UVP  /)!-..=!!/s1   0AS :T 	T'TT	T:T55T:z/searchc                  6   t        j                  d      xs i } | j                  dd      j                         }| j                  dd      }|st	        ddi      d	fS t
        j                  ||
      }t	        ||t        t
        j                        d      S )zSearch documentsTr+  r  r=  r  r*   r  zQuery is requiredr   r0  )r>  r  r  )	r   r>  r9   r  r   r  r  r  r  )r2   r  r  r>  s       r=   r  r    s     4(.BDHHWb!'')EbA!456;;!!%1!-G|556  r?   z
/documentsc            
         i } t         j                  D ]c  }|j                  dd      }|| vrdd|j                  dd      d| |<   | |   dxx   dz  cc<   |j                  d      sT| |   d	xx   dz  cc<   e t        t         j                        }t	        d
 t         j                  D              }t        | j                         D cg c]  \  }}d|i| c}}t        |       ||t        |dkD  r||z  dz  ndd      d      S c c}}w )z)List all documents with enrichment statusr<  r  r   rF  )chunk_countr2  rF  rN  rM   rP  r2  c              3   D   K   | ]  }|j                  d       sd  ywrP  rM   Nr  r	  s     r=   rr  z!list_documents.<locals>.<genexpr>  s     PqaeeJ>OP     r  )r  total_sourcesr  total_enrichedenrichment_percentage)r  r  r9   r  rx  r   rw  r  )r'  r*  r<  r  rS  r  r$  s          r=   list_documentsrU    s    G%% 
39-  "#!$)!DGFO
 	&!+&77:FO,-2-
3 |--.LPL$:$:PPN5<]]_ETQx(a(EW$(!&P\_`P`(E(Kfgij!k  Es   
D
z/chunksc                  P   t         j                  j                  d      } t         j                  j                  dd      j                         dk(  }t         j                  j                  dd      j                         dk(  }t	        t         j                  j                  dd            }t	        t         j                  j                  dd	            }t
        j                  }| r$|D cg c]  }|j                  d      | k(  s| }}|r"|D cg c]  }|j                  d      s| }}n#|r!|D cg c]  }|j                  d      r| }}t        |      }|dz
  |z  }||z   }	|||	 }
g }t        |
|
      D ]  \  }}|j                  ||j                  dd      |j                  d|      |j                  dd      |j                  d      r|j                  dd      dd nd|j                  dg       dd |j                  dg       t        |j                  dd            dkD  r|j                  dd      dd dz   n|j                  dd      t        |j                  dd            d	        t        ||||||z   dz
  |z  d| ||dd      S c c}w c c}w c c}w )z)List chunks with pagination and filteringr<  rP  r=  truerawr  rM   per_pagerI  )r  r  r  Fr  Nr  r  r"   r  r  ,  rC  )	r  r<  r  rP  r  r  r  content_previewcontent_length)r  rY  r  r  )r<  enriched_onlyraw_only)rd  
paginationfilters)r   r  r9   r  rG   r  r  r  rQ  r  r   )r<  r]  r^  r  rY  filteredr  r  r  end	paginatedrd  rV  r*  s                 r=   list_chunksrd    s    \\h'FLL$$Z4::<FM||r*002f<Hw||*+D7<<##J34H %%H'E!155?f+DAEE'=!155+<A==	'A!quuZ/@AAA MEAX!E
(
Cs#I FIU3 3ggh	2
A.
E27:wwy7Iswwy"-ds3t'',3BQ7ggh+GJ377S\^`KaGbehGhswwy"5ds;eCnqnunuv  BD  oE!#'')R"89

 
	  h&*x7	
 * 
  9 F >As$   *JJJ'J4J#J#z/chunks/<int:chunk_index>c                     | dk  s| t        t        j                        k\  rt        ddi      dfS t        j                  |    }t        | |j	                  dd      |j	                  d      |j	                  d      |j	                  d	d
      |j	                  d      |j	                  dg       |j	                  dg       |j	                  dd      t        |j	                  dd            |j	                  d      |j	                  d      d      S )z$Get a single chunk with full detailsr   r  zChunk not foundr(  r<  r  r  rF  rP  Fr  r  r  r  r=  rg  r  )r  r<  r  rF  rP  r  r  r  r  r\  rg  r  )r  r  r  r   r9   )chunk_indexr*  s     r=   	get_chunkrg  $  s     Q+\-C-C)DD!234c99

 
 
-C''(I.GGJ'1GGJ.779%gglB/''(B'779b)cggi45779%/  r?   z$/chunks/by-source/<path:source_name>c                     t        |       }t        j                  D cg c]  }|j                  d      |k(  s| }}|st	        ddi      dfS t        d |D              }t	        |t        |      ||D cg c]r  }|j                  d      |j                  dd      |j                  d	      r|j                  d	d
      dd nd|j                  dg       |j                  dd
      dd dt c}d      S c c}w c c}w )z$Get all chunks for a specific sourcer<  r  zSource not foundr(  c              3   D   K   | ]  }|j                  d       sd  ywrP  r  r  s     r=   rr  z'get_chunks_by_source.<locals>.<genexpr>E  s     @qaeeJ.?@rQ  r  rP  Fr  r=  Nr  r  r  )r  rP  r  r  r[  )r<  r  enriched_chunksrd  )r   r  r  r9   r   rx  r  )r  safe_sourcer  rd  r2  r  s         r=   get_chunks_by_sourcerl  <  s    "+.K%//RA155?k3QaRFR!345s::@F@@NF)   j)j%056UU95EquuY+DS14eeHb) uuY3DS9
 	   Ss   C6C66A7C;z/delete/<path:filename>DELETEc                 N   t        |       }t        t        j                        }t        j                  D cg c]  }|j	                  d      |k7  s| c}t        _        |t        t        j                        z
  }|dkD  rt        j                  rt        j                  D cg c]  }|d   	 }}t        j                  |      }t        rrt        j                  t        j                        t        _
        |j                  d      }t        j                  |       t        j                  j                  |       n]|t        _        nQt        r-t        j                  t        j                        t        _
        nt        j                   g       t        _        t        j#                          t        j%                          t&        j(                  |z  }|j+                         r|j-                          t/        dd| d| d|d	      S t/        d
di      dfS c c}w c c}w )zDelete a documentr<  r   r  r  TzDeleted z (z chunks))rG  r  chunks_removedr  zDocument not foundr(  )r   r  r  r  r9   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  unlinkr   )	r+  r$  original_countr  removed_countr  r  r  r  s	            r=   delete_documentrs  U  s    $H-M //0N)5)?)?dA155?VcCcadL"S)?)?%@@Mq!!.:.D.DE)EHE%;;HEJ%*%6%6|7Q7Q%R" * 1 1) <""=1""&&}5*4'%*%6%6|7Q7Q%R"*,((2,'..0 NN]2	!-=/J+
  	 G123S88I e Fs   HH%H"z/reloadc                  t    t         j                          t        dt        t         j                        d      S )zReload the indexT)rG  r  )r  r  r   r  r  rJ   r?   r=   reloadru    s3     |556  r?   z/enrichc                     t         5  t        j                  d      dk(  r7t        1t        j	                         rt        dt        d      dfcddd       S t        j                  d      dk(  rndt        d<   t        j                  d      xs d	t        d<   t        j                  t        j                        j                         d
z   t        d<   t                ddd       t        j                  d      xs i } | j                  d      fd}t        j                  |d      at        j!                          t        dddd      dfS # 1 sw Y   uxY w)z,Enrich documents with LLM-generated metadatar  r  NzEnrichment already running)r  r  i  r  r  r  r  r  Tr+  r<  c                  4   	 t        t        j                  j                  dt        j                  dd      xs d            } 	 t
        j                          rAt        t
        j                        D cg c]  \  }}|j                  d      k(  s| }}}n+t        t        t        t
        j                                    }t        |      }|dk(  rt        5  dt        d<   dt        d	<   d
t        d<   t        j                  t        d<   t        d<   dt        d<   dt        d<   t!        j"                  t$        j&                        j)                         dz   t        d<   | t        d<   t+                d d d        y t-        d |D              }||z
  }t!        j"                  t$        j&                        j)                         dz   }t        5  dt        d<   |rt        ||z  dz        ndt        d	<   d| d| d| dt        d<   t        j                  t        d<   t        j                  d      xs |t        d<   |t        d<   t        d<   |t        d<   |t        d<   | t        d<   t+                d d d        |dk  rt        5  dt        d<   dt        d	<   dt        d<   t        j                  t        d<   t!        j"                  t$        j&                        j)                         dz   t        d<   t+                d d d        y d}|D ]  }	t
        j                  |	   }
|
j                  d      r)t        j/                  |
      }|
j1                  |       t!        j"                  t$        j&                        j)                         dz   |
d<   	 t
        j3                  |
      }t        t
        j4                        t        t
        j                        k(  r!|j7                         t
        j4                  |	<   t        t
        j8                        t        t
        j                        k(  rt
        j8                  |	   }t        |j;                               }|t
        j8                  |	<   t        t
        j8                        }|dkD  r%t
        j<                  |z  |z
  |z   |z  t
        _        |dz  }||z   }|rt        ||z  dz        nd}t        5  dt        d<   |t        d	<   d| d| d| d| d	t        d<   t        j                  t        d<   t!        j"                  t$        j&                        j)                         dz   t        d<   |t        d<   | t        d<   d d d        || z  dk(  sit
        j?                          t        5  t+                d d d         t
        j?                          t        5  dt        d<   dt        d	<   d| d | dt        d<   t        j                  t        d<   t!        j"                  t$        j&                        j)                         dz   t        d<   |t        d<   t+                d d d        y # t        $ r Y 	w xY wc c}}w # 1 sw Y   y xY w# 1 sw Y    xY w# 1 sw Y   y xY w# t        $ r Y w xY w# 1 sw Y   :xY w# 1 sw Y   xY w# 1 sw Y   y xY w# t        $ r}tA        d!|        dd l!}|jE                          t        5  d"t        d<   d#tG        |       t        d<   t        j                  t        d<   t!        j"                  t$        j&                        j)                         dz   t        d<   t+                d d d        n# 1 sw Y   nxY wY d }~y Y d }~y d }~ww xY w)$Nr  r  r*   r<  r   r  r  r  r  zNo chunks found to enrich.r  r  r  r  r  r  c              3   f   K   | ])  }t         j                  |   j                  d       s&d + ywrP  )r  r  r9   )rp  rV  s     r=   rr  z;enrich_documents.<locals>.run_enrichment.<locals>.<genexpr>  s*     "j@V@VWX@Y@]@]^h@i1"js   '11r  zResuming enrichment: r  z' already enriched. Enriching remaining rC  r  z0No chunks need enrichment (all already enriched)rP  enriched_atrM   z	Enriched z chunks (this run: r  zSuccessfully enriched z chunks (target: z[RAG] Enrichment error: r  r  )$rG   r7   r8   r9   r  r  r  r  rQ  r  r  ranger  enrichment_lockr  rN  r   rM  r   r  r  r  rx  rX  rS  r  r  r  r  rH  r  r  r  r  r  rI   )r  rV  r  target_indicesr  already_enriched	remainingr  enriched_this_run	doc_indexr*  enriched_docnew_textold_lennew_lenndoner  r  r  source_filters                       r=   run_enrichmentz(enrich_documents.<locals>.run_enrichment  s   z	2"2::>>2OQbQfQfgy{}Q~  RE  CE  $F   G668
 09,:P:P0Q!v1UVUZUZ[cUdhuUu!!v!v!%eC0F0F,G&H!I~.Lq $ 
62=%h/47%j13O%i04F4V4V%j12?%h/89%n578%m46>ll8<<6P6Z6Z6\_b6b%l3<L%&8935
6 ""jn"jj$'77I!hll3==?#EJ  2.7!(+`l5E5TX[4[0\rs!*-1FGWFXXYZfYg  hO  PY  OZ  Z]  0^!),0B0R0R!*-2C2G2G2U2cYc!,/2<!,/.;!(+4@!.13C!-08H!"45/12 A~$ 62=%h/47%j13e%i04F4V4V%j16>ll8<<6P6Z6Z6\_b6b%l3356  !+ ):	",,Y777:&1>>sC

<(%-\\(,,%?%I%I%Kc%QM"+BB3GH<889SAWAW=XXCK>>CS55i@<334L<R<R8SS".":":9"E"%hnn&6"7>E00; 8 89q5;G;V;VYZ;Z]d;dgn;nrs:sL7 "Q&!'*;;?K3| 3s:;QR$ M2;%h/4<%j15>tfAl^Sfgxfyyz  |E  {F  FG  4H%i04F4V4V%j16>ll8<<6P6Z6Z6\_b6b%l37;%m4<L%&89M %'771< 335( :79: :Q):X ++-  2.9!(+03!*-1GHYGZZklxkyyz/{!),0B0R0R!*-2:,,x||2L2V2V2X[^2^!,/3?!-0/12 2E  
 "w
6 2 26 2 ! M M: :
2 2  		2,QC01!  2.6!(+18Q/A!),0B0R0R!*-2:,,x||2L2V2V2X[^2^!,//12 2 2 2 2			2sH  A[ Y [ 7Y&Y&A[ BY,9[ A[ B'Y8:[ A8Z[ B
[ DZ7%[ BZ!)[ ;[ Z. %[ BZ;[ 	Y#[ "Y##	[ ,Y51[ 5[ 8Z=[ Z
[ [ 	Z[ Z[ !Z+	&[ .Z8	3[ ;[ [ [ 	^(^8A;]<3	^<^	^^r  r  zEnrichment started)rG  r  r  r  )r{  r  r9   enrichment_threadis_aliver   r   rM  r   r  r  r  r   r>  r!  r"  r  )r2   r  r  s     @r=   enrich_documentsr    sL    
 .  *i7<M<Y^o^x^x^z5+  . .   *i7*7h'+<+@+@+K  ,F  PFi(.6ll8<<.H.R.R.TWZ.Zl++-. 4(.BDHHX&M|2~ "((tL'  		 e. .s   AEBEEz/enrichment-statusc                  X    t         5  t        t              cddd       S # 1 sw Y   yxY w)zGet current enrichment statusN)r{  r   r  rJ   r?   r=   get_enrichment_statusr  .  s%     
 *()* * *    )z/processing-statusc            
      V   g } 	 t         j                  j                         rt         j                  j                         D ]  j                  j                         dv s t        fdt        j                  D              }| j                  j                  |rdndj                         j                  t        j                  j                         j                        j!                         d        t'        d| i      S # t"        $ r}t%        d|        Y d}~)d}~ww xY w)	z"Get status of file processing jobsr  c              3   t   K   | ]/  }|j                  d d      j                  j                         1 ywr  )r9   endswithr  )rp  r*  r#  s     r=   rr  z(get_processing_status.<locals>.<genexpr>?  s3      $ "-66qvv>$s   58r  pending)r+  r  sizer  z'[RAG] Error getting processing status: NrF  )r  r   r  iterdirr  r  r  r  r  r  r  statst_sizer   fromtimestampst_mtimer  r  r  r   )rF  	processedr  r#  s      @r=   get_processing_statusr  5  s     E=>>  "^^++- 88>>#'77 # $#/#9#9$ !I LL$%FF1:+	 ! 0 0$,$:$:1668;L;L$M$W$W$Y	"   GU#$$  =7s;<<=s   AD !BD 	D(D##D(z/enrich-uploadc                     dt         j                  vrt        ddi      dfS t         j                  d   } | j                  st        ddi      dfS t	        | j                        j
                  j                         }|dvrt        ddi      dfS t        | j                        t        j                  z  | j                  t                     t        t        j                               dd	 t        d
dd       fd}t        j                   |d      }|j#                          t        ddd      dfS )z,Upload and process with immediate enrichmentrE  r  r  r   r  r  z Only PDF and TXT files supportedNr  r  r   z5File uploaded, starting processing with enrichment...c            
      &   	 t        
ddd       t        j                  	      } | rt        | j	                               dk  r#t        
ddd       t
        j                  	      } | rt        | j	                               dk  rt        
ddd	       y t        
dd
d       t        j                  |       }t        j                  | |      }|st        
ddd       y t        |      D ]"  \  }}|d<   ||d<   t        |      |d<   ||d<   $ t        
dddt        |       d       
fd}t        j                  ||      }t        
ddd       t        j                  |       t        
ddd       t        j                          t        
dddt        |       dt        |      t        j                         y # t        $ rF}t!        d|        dd l}|j%                          t        
dddt'        |              Y d }~y d }~ww xY w)Nr  r*   zExtracting text...r  r  zTrying OCR...r  r   zFailed to extract text   zChunking document...zNo valid chunks extractedr<  r  r  rF  rL  z
Enriching z chunks with LLM...c           
      X    dt        | |z  dz        z   }t        d|d|  d| d       y )NrL  r  r  zEnriching chunk r  rC  )rG   r  )currentr  pctr  s      r=   enrich_progresszEenrich_on_upload.<locals>.process_and_enrich.<locals>.enrich_progress  s=    3%2566c=MgYVWX]W^^a;bcr?   r  zAdding to vector store...r  r  r  z$Successfully processed and enriched z chunks!)r  r  z[RAG] Error: r  )r  r  r  r  r  r  rF  re  rQ  r  r  r  r  r  rN  r  r  r  r  rI   )rl  rQ  rd  rV  r  r  rj  r  r  r  r  r$  s            r=   process_and_enrichz,enrich_on_upload.<locals>.process_and_enriche  s   6	EFL"6JK --i8D3tzz|,s2b/J"//	:3tzz|,s2!5MNFL"6LM$99$NH"--dH=F!5PQ%f- 25"/h$%j!(+Fn%)1o&	2 FL"
3v;-Ob6cdd 1BB6?[OFL"6QR''GFL"6GHS6s?7K6LHU 1+;;	  	EM!%&!FHa73q6(1CDD		Es&   BG A
G C!G 	H
<HHTr  z2File uploaded. Processing with enrichment started.r  r  )r   rF  r   r+  r   r  r  r   r  r   r  rI   r  r   r  r!  r"  r  )rE  r#  r  r%  r  r  r$  s       @@@r=   enrich_on_uploadr  O  s-    W]]"!345s::== D==!123S88
t}}

$
$
*
*
,C
""!CDEsJJ#DMM2M.IIIc)nr"FFJ+bc7Er %7EF
LLNG!	 
 	 r?   z	/validatec            
         t        t        j                        dk(  rt        ddg d      S g } t        D ]  \  }}t        j                  |d      }g }dj                  d |D              j                         }|D ]&  }|j                         |v s|j                  |       ( |rt        |      t        |      z  nd}| j                  |||t        |d	      t        |      |r|d   d
   ndd        | rt        d | D              t        |       z  nd}t        dt        |d	      t        t        j                        | d      S )z.Run validation queries to test RAG performancer   no_documentsz2No documents loaded. Upload legal documents first.)r  r  r>  r  r0  r  c              3   @   K   | ]  }|j                  d d        yw)r  r=  Nr  rp  r  s     r=   rr  zvalidate_rag.<locals>.<genexpr>  s      N!y"!5 Nr
  r    r}  )r  expected_termsfound_termscoverageresult_count	top_scorec              3   &   K   | ]	  }|d      yw)r  NrJ   r  s     r=   rr  zvalidate_rag.<locals>.<genexpr>  s     :Q1Z=:s   rG  )r  overall_coverager  r>  )r  r  r  r   VALIDATION_QUERIESr  r  r  r  r  rx  )	r>  r  r  search_resultsr  combined_textr  r  r  s	            r=   validate_ragr    s^    <!!"a'$K
  	 G!3 ~%,,Ua,8  N~ NNTTV" 	)Dzz|},""4(	) >L3{#c.&99QR,&h*/7E*731
 	* NUs:'::S\IZ[!"2A6|556	  r?   z/legal-intelligencec                     t        dt        t              t        t              t        t              t        t
              t        t              t        t              t        t              ddt        t        j                               ddt        t        j                               ddt        t        j                               dd	t        t        j                               d
dt        t              ddt        t         j                               t         j#                         D  ci c]  \  } }| |d    c}} ddt        t$        j                               t$        j'                         D cg c]  }|d   	 c}ddt        t(        j                               dd	dddddddddd	dd       S c c}} w c c}w )!z5Get information about the legal intelligence featuresz-Legal terms weighted by importance tier (1-6))r^  tier_1_counttier_2_counttier_3_counttier_4_counttier_5_counttier_6_counttotal_termsz8Related legal concepts automatically expanded in queries)r^  clusterszPhase-aware search boosting)r^  phasesz$Doctrine-specific search enhancement)r^  	doctrineszCourt authority level boosting)r^  levelsz Legal citation pattern detection)r^  pattern_countzASource authority weighting (gold standard, persuasive, declining)r&  )r^  r  weightsz#Legal reasoning pattern recognitionr  )r^  schemasr  z1Doctrine relationship mapping for query expansion)	tiered_keywordsconcept_clusterslitigation_phaseslegal_doctrinesjurisdiction_hierarchycitation_recognitionprecedential_hierarchyargument_schemascross_referencesz2Documents from same litigation phase get 15% boostzDU.S. Supreme Court: 1.5x, Federal Circuit: 1.3x, State Supreme: 1.2xz12% boost per matched doctrinez+Up to 24% boost for Supreme Court citationsz-Positive treatment: +5%, Heavy negative: -10%z=Gold standard: 2.0x, Highly persuasive: 1.5x, Declining: 0.4xz.Recent statutes boosted, landmark cases stablez2Well-structured arguments boosted by schema weightz*Up to 25% boost for related legal concepts)	phase_awarer   doctrine_specificcitation_authoritycitation_treatmentr  r  r  cross_reference	   )featuressearch_booststotal_boost_factors)r   r  r  r  r  r  r  r  r  r  r  r  rv  r  r   r  r>  rw  r  valuesr  )r  r$  rK  s      r=   legal_intelligence_infor    s      O #L 1 #L 1 #L 1 #L 1 #L 1 #L 1";/	   Z !7!<!<!>?!
  =05578"
  F!/"6"6"89 
  @5::<='
  B!$%<!=%
  c5::<=7M7S7S7UVtq!Aq{NV'  E 0 5 5 78-=-D-D-FG!F)G!  S!"8"="="?@!S-
^ Pb!A"O"Q[H"VK

  !u; ; ;F W
 Hs   ,F84F>z/analyze-queryc                     t        j                         xs i } | j                  dd      }|st        ddi      dfS t	        |      }t        |      }t        |      }|j                         }g g g g g g d}t        D ],  }||v st        |      }|dkD  s||   j                  |       . t        ||||dd	 |j                         D 	
ci c]  \  }	}
|
s	|	|
 c}
}	t        |      d
      S c c}
}	w )z>Analyze a query to show how legal intelligence will process itr  r=  r  zQuery requiredr   )rM   r    r"   r  r  r  r   NrI  )r  r;  r  r	  tier_matchestotal_expansion_terms)r   r>  r9   r   r~  r  r  r  r  r  r  rw  r  )r2   r  r{  r  r	  r  r  r  r"  r  r$  s              r=   analyze_queryr    s    #DHHWb!E!123S88 $E*E&u-I/6N ++-KbRB2"=L 0; &DaxT"))$/	0 '("-*6*<*<*>D$!Q!AD!$^!4  
 Es   

C.C.)queriesdaily_statsdoctrinal_coverageresponse_qualitysession_startc                  B   t         j                  dz  at        j                         r	 t	        t        d      5 } t        j                  |       }t        j                  |       t        j                  t        j                        t        d      z
  j                         }t        j                  dg       D cg c]  }|j                  dd      |kD  r| c}t        d<   ddd       yyc c}w # 1 sw Y   yxY w# t         $ r}t#        d	|        Y d}~yd}~ww xY w)
z5Initialize analytics file path and load existing datazanalytics.jsonr  r  )daysr  	timestampr=  Nz%[Analytics] Error loading analytics: )r  r   ANALYTICS_FILEr  r  r  r  analytics_datarS  r   rM  r   r  	timedeltar  r9   r  r  )r#  loadedcutoffqr  s        r=   init_analyticsr  H  s     __'77N		?nc* Qa1%%f-",,x||4yb7IITTV8F8J8J9VX8Y -Q101k20F0O ./ -Qy)Q Q -QQ Q  	?9!=>>	?sG   C= BC1:C,
C1"C= ,C11C:6C= :C= =	DDDc                      t         sy	 t        t         d      5 } t        j                  t        |        ddd       y# 1 sw Y   yxY w# t
        $ r}t        d|        Y d}~yd}~ww xY w)zSave analytics to diskNr  z$[Analytics] Error saving analytics: )r  r  r  r  r  r  r  )r#  r  s     r=   save_analyticsr  X  s^    :.#& 	)!IIna(	) 	) 	) :4QC899:s1   A	 =A	 AA	 A	 		A*A%%A*rJ  c                 Z   t         5  t        j                  t        j                        }|j                  d      }t        |       }t        |       }t        |      }|j                         t        |       ||t        |j                  dg             |j                  dd      ||j                  d      |j                  dd      d	}t        d   j                  |       t        t        d         d	kD  rt        d   d
d t        d<   |t        d   vrdddi i dddt        d   |<   t        d   |   }|dxx   dz  cc<   |dxx   |z  cc<   |d   |d   z  |d<   |dxx   |j                  dd      z  cc<   |d   |d   z  |d<   ||d   vrd|d   |<   |d   |xx   dz  cc<   |D ]L  }	|	|d   vrd|d   |	<   |d   |	xx   dz  cc<   |	t        d   vrdt        d   |	<   t        d   |	xx   dz  cc<   N t        t        d         dz  dk(  r
t                ddd       y# 1 sw Y   yxY w)zTrack a query for analyticsz%Y-%m-%dr'  r2  r   r  r:  )	r  query_lengthr{  r  sources_countr2  quality_scorer  r:  r  r   iNr  )query_countavg_qualitytotal_qualityr  doctrines_usedavg_response_timetotal_response_timer  rM   r  r  r  r  r  r  r  r*   )analytics_lockr   rM  r   r  strftimer~  r  calculate_response_qualityr  r  r9   r  r  r  )
r  rJ  rM  todayr{  r  r  query_recorddailyr  s
             r=   r?  r?  b  s   	 Bll8<<(Z( (.*51	 3=A J" !2!29b!AB+//0@!D*%))*5 - 1 12Da H

 	y!((6~i()D0(6y(A%&(IN9% }55  !""$%&'(4N=)%0 }-e4m!o-/$_5m8LLm#$(9(9:La(PP$%*+@%AE-DX%X!" h'%&E(OE"h!# " 	@Hu%56645&'1"#H-2- ~.BCCAB34X>/0:a?:	@ ~i()B.!3EB B Bs   HH!!H*c                    d}| j                  dg       }|r|t        dt        |      dz        z  }| j                  dd      }|r|t        d|dz        z  }| j                  dd	      }t        |      d
kD  r|dz  }t        |      dkD  r|dz  }t        |      dkD  r|dz  }| j                  dg       }|t        dt        |      dz        z  }| j                  dg       }|t        dt        |      dz        z  }t        d
|      S )z0Calculate a quality score (0-100) for a responser   r'  r  r  r2  rI  r  r   r=  r  r*   rZ  rN   r  r  r"   r  r    )r9   r  r  )rJ  r}  r'  rP  r   r  r  s          r=   r  r    s   E 	2.GRW)**   !115HRA&& x,F
6{S
6{S
6{S
 x,F	SS[1_%%E ""<4J	SS_q())EsE?r?   z
/analyticsc                  P   t         5  t        j                  dg       } t        |       dkD  r| dd n| }|rt	        d |D              t        |      z  nd}|rt	        d |D              t        |      z  nd}|rt	        d |D              t        |      z  nd}i }|D ],  }|j                  d	d
      }|j                  |d      dz   ||<   . t        t        j                  di       j                         d      dd }g }	|D ]_  \  }
}|	j                  |
|j                  dd      t        |j                  dd      d      t        |j                  dd      d      d       a t        t        j                  di       j                         d d      dd }t        t        |       t        |d      t        |d      t        |d      t        j                  d      d||	|D cg c]
  \  }}||d c}}t        t        j                  di             t        t              dt        t        j                        t	        d t        j                  D              t        t        d t        j                  D                    dd      cddd       S c c}}w # 1 sw Y   yxY w) zGet analytics dashboard datar  r  iNc              3   @   K   | ]  }|j                  d d        yw)r  r   Nr  rp  r  s     r=   rr  z get_analytics.<locals>.<genexpr>       L!%%3Lr
  r   c              3   @   K   | ]  }|j                  d d        yw)r  r   Nr  r  s     r=   rr  z get_analytics.<locals>.<genexpr>  r   r
  c              3   @   K   | ]  }|j                  d d        yw)r:  r   Nr  r  s     r=   rr  z get_analytics.<locals>.<genexpr>  s     U&8! <Ur
  r{  r  rM   r  T)rV     r  r  r  )dater  r  r  r  c                     | d   S r  rJ   r  s    r=   r  zget_analytics.<locals>.<lambda>  s
    !A$ r?   r  r*   r  )total_queriesavg_quality_scoreavg_sources_per_queryavg_response_time_msr  )r  r  )coveredtotal_availablec              3   D   K   | ]  }|j                  d       sd  ywrP  r  r	  s     r=   rr  z get_analytics.<locals>.<genexpr>   s     )aquuU_O`!)arQ  c              3   @   K   | ]  }|j                  d d        ywr  r  r	  s     r=   rr  z get_analytics.<locals>.<genexpr>  s     )^!!%%"*=)^r
  )r  enriched_documentsunique_sources)r  phase_distributiondaily_trendstop_doctrinesr  document_stats)r  r  r9   r  rx  r  rw  r  r  r   r  r  r  r  )all_queriesrecent_queriesr  avg_sourcesr  phase_countsr  r{  recent_daysr  daystatsr  r  r  s                  r=   get_analyticsr    s    
 6$((B7/2;/?#/ETU+; gucL^LLsSaObbz{ftcL^LLsSaObbz{o}CUnUUX[\jXkk  DE  	AAEE'9-E"."2"25!"<q"@L	A
 ^//rBHHJTXYZ\[\]% 	JC 99]A6$UYY}a%@!D%*5995H!+La%P	! 	 3R8>>@
 2	 !$[!1%*;%:).{A)>(-.?(C!/!3!3O!D #/(FSTda1q9T~112FKL#&#7#
 $'|'='=#>&))a\5K5K)a&a"%c)^|G]G])^&^"_
 E6 6X UY6 6s   GJ%J4BJJJ%z/analytics/query-logc                  p   t         5  t        t        j                  j	                  dd            } t        t        j                  j	                  dd            }t
        j	                  dg       }t        t        |            ||| z    }t        |t        |      | |d      cddd       S # 1 sw Y   yxY w)zGet recent query loglimitrL  offsetr   r  )r  r  r  r  N)
r  rG   r   r  r9   r  r  r  r   r  )r  r  r  recents       r=   get_query_logr     s     
 GLL$$Wb12W\\%%h23 $$Y3hw'(?\	
   s   BB,,B5z/analytics/doctrinal-coveragec                     t         5  t        j                  di       } g }t        j	                         D ]  \  }}| j                  |d      }d}t
        j                  D ]?  }|j                  dd      j                         t        fd|dd D              s;|dz  }A |j                  ||||dd t        d	|t        dt        t
        j                              z  d	z  d
z        d        |j                  d d       t        |t        t              t        |D cg c]  }|d   dkD  s| c}      t        |D cg c]  }|d   dkD  s| c}      d      cddd       S c c}w c c}w # 1 sw Y   yxY w)z&Get detailed doctrinal coverage reportr  r   r  r=  c              3   B   K   | ]  }|j                         v   y wr  r  )rp  r  r  s     r=   rr  z)get_doctrinal_coverage.<locals>.<genexpr>'  s     E4tzz|w.Er  Nr  rM   r  r    )r  r  document_count	key_termscoverage_scorec                     | d   S )Nr#  rJ   r  s    r=   r  z(get_doctrinal_coverage.<locals>.<lambda>3  s    !$4"5 r?   Tr  r  r#  )r  total_doctrinescovered_in_queriescovered_in_documents)r  r  r9   r  rw  r  r  r  r  r  r  ry  r  r  r   )	r  reportr  r|  r  	doc_countr*  r  r  s	           @r=   get_doctrinal_coverager,    s~    
 !%%&:B? .446 	OHe",,x3KI#-- #'')R0668E5!9EENI#
 MM$*"+"2AY"%cIAs<CYCY?Z8[,[_b+bef+f"g 	$ 	5tD"?3"%&&QQAm<Lq<Pq&Q"R$'F(Vqa@P>QTU>U(V$W	
 3 8 'R(V; s=   BE5BE5"E+0E+4E5E0E0E5+
E55E>z/analytics/exportc                  X    t         5  t        t              cddd       S # 1 sw Y   yxY w)zExport analytics data as JSONN)r  r   r  rJ   r?   r=   export_analyticsr.  =  s$     
 '~&' ' 'r  )r  z[Analytics] Init error: __main__RAG_PORTi  z;[RAG] Starting Advanced Legal Document RAG Service on port z[RAG] FAISS: z, SentenceTransformers: z, PyMuPDF: z[RAG] Documents loaded: z0.0.0.0)hostportdebugr  )rC   r7   sysr  r  r  r  r  r(  r!  queuer   r   pathlibr   	functoolsr   dataclassesr   typingr	   r
   r   r   r   flaskr   r   r   r   r   werkzeug.utilsr   r   r^  r  r  r  r  r  r  r  r  r!  rI   rG   r  rH   r#  STRATEGIC_ADDITIONSr  r  rv  r~  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r>  rA  rR  r  r  r  r  r  ENHANCED_METADATA_FIELDSr  r.  r/  r%  r  ImportErrorpdfminer.high_levelr  r  r  PyPDF2r  r  numpyr  	HAS_NUMPYsentence_transformersr  r  r  r  r  r  r  r  r  r8  rI  r@   appcase_apir  register_blueprintr  r  r  r  r  r  r  r  r8   r9   r  Lockr  r{  Queueprocessing_queuer  r   r  r  r  r  r  r  r  r  router  r  r  r  r  r  r  r  r&  r)  rK  r  rU  rd  rg  rl  rs  ru  r  r  r  r  r  r  r  rM  r  r  r  r  r  r  r  r?  r  r  r   r,  r.  r  r  r2  r  r  runrJ   r?   r=   <module>rL     s   
 
     	    '   ! 3 3 O O *
 $8 $8 $8P !$sdhi|}~#sdhi|}~3$dexyz!SDfgz{|"cTgh{|} 

	
 <,. "./1>?     &# %     ' (



i9 ~


A$ L
# 
# 
  	  djT-/FG]S S MNS? S T 






I)V# $  &C D   (c d D 1BD/=?179 ;=+/1";="68 :< 4 EN	 UL	 38	 A' 6#   :#d #4 #SX #RR7: 8K<B 6< 9X8O. `C D BEKVa S8.IOS	 Z-/KLRV	 SI57RS\	 LPNT	  Y]M[	 ZYKL	 MMMU	Y2 h  .3 3 *   #
 $ 4 > 	KDL J
I
&9 $
I
Lz zzM MfU Uv+ +bP Pl Hk27 7#	
89
 
V$6" &v.  BJJNN+H"MN  	  ).."5;;=   )),DD A *F 

0PQ A B     ! #	
C 	
 	
 	
c 	
8AC 8AD 8AC 8A~ 3+ + 8~ ~JdU dt dXU Xt X	U 	U4 	U. .$ .b 9ug& ' 9ug& '4 9vh'#  (#L %1Z 2Z 6F8$h"  %h"R 9vh'  ($ <%) *8 9ug&6 '6r &8 9. 1E7C D0 $x.@A*9  B*9Z 9vh'  ( 9vh'Z  (Zz %1* 2* %1% 2%2 fX.U  /Uv ;(& )&R  5'2= 3=@ fX.  /L !X\\(,,/99;  !? :Ds D4 DLd s D <%)8 *8v !E73 4$ *UG<  = F 0' 1'*" zrzz~~j$/0D	Gv
NO	M)$<=V<WWbcnbo
pq	$S)?)?%@$A
BCGGUG3 c^  K  L  J  I  & %&  I  L@$  7	1!
5667D9  *	$QC
())*s   8b; ?c	 c c% c3 !d (d +d &d; ;cc	ccc"!c"%c0/c03c>=c>ddddd8"d33d8;e ee