
    HCi?                     D   d Z ddlZddlZddlZddlZddlZddlmZmZm	Z	 	 ddl
mZmZ dZd	Zd
Z	 	 	 	 d2dede	e   dededede	e   fdZdedefdZdedefdZdedefdZdee   dee   fdZefdee   dedee   fdZdee   deeee   f   fdZdedee   dedefdZdedee   defdZd3ded ed!edefd"Zded#edefd$Z e!d%k(  r1d&d'd(d)id*d+d,d(d)id*d-d.d(d/id*gZ" ed0e"      Z# e$d1        e$e#       yy# e$ r  ej                  dd      ZdZdZY w xY w)4zz
Answer Synthesis Pipeline - 3-Layer Enhancement for RAG Quality
Fixes: Answer incoherence, chunk mixing, reasoning leaks
    N)ListDictOptional)GROK_API_KEY
GROK_MODELz$https://api.x.ai/v1/chat/completionsXAI_API_KEY zgrok-2-1212gffffff?   promptsystem_prompt
max_tokenstemperaturetimeoutreturnc                 t   t         sy	 dt          dd}g }|r|j                  d|d       |j                  d| d       t        |||dd	}t        j                  t
        |||
      }|j                          |j                         d   d   d   d   S # t        $ r}	t        d|	        Y d}	~	yd}	~	ww xY w)z4Call Grok API for synthesis (grounded final answer).NzBearer zapplication/json)AuthorizationzContent-Typesystem)rolecontentuserF)modelmessagesr   r   stream)headersjsonr   choicesr   messager   zGrok API Error: )
r   appendr   requestspostGROK_API_URLraise_for_statusr   	Exceptionprint)
r   r   r   r   r   r   r   payloadresponsees
             4/var/www/html/leadgen/airagagent/answer_synthesis.pycall_grok_apir)      s     &|n5.

 OOX-HIF;<   $&
 ==wWV]^!!#}}y)!,Y7	BB  $%s   BB 	B7B22B7textc                    | s| S d| v r"| j                  d      d   j                         } d| v r&d| vr"| j                  d      d   j                         } | j                  dd      j                         } | j                  dd      j                         } g d}g d	}| j                         }d}|D ]4  }|j	                  |j                               }|dk7  s(|dk(  s||k  s3|}6 |dkD  r$|t        |       d
z  k  r| |d j                         S | j                  d      }g }d}	d}
|D ]  }|j                         |	rst        fd|D              }t        j                  dt        j                        rd}|r|
dz  }
[t              dkD  s|	rld}	d}
|j                  |        dj                  |      j                         }t        |      dk  rht        |       dkD  rZ| j                  d      }t        |      D ]9  j                         t              dkD  s"t        fd|D              r7c S  | S |r|S | S )zx
    Remove AI reasoning/thinking from response.
    Handles DeepSeek R1 model's verbose reasoning about documents.
    z</think>z<think>r   u   <｜end▁of▁sentence｜>r	   z<|end_of_sentence|>)zC^(Okay|Alright|Let me|I'll|I need to|Looking at|Based on|First,? I)z6^(The user is asking|The question asks|To answer this)z-^(I see that|I notice that|I can see|I found)z)^(Checking|Analyzing|Reviewing|Searching)z.^(First, there's|Document \d|Now,? looking at)z)^(Putting it all together|So,? from this)z/^(That's about|That's not relevant|Not related)z$^(Also unrelated|Again not relevant))
zThe answer iszIn summary,zTo summarize,zIn conclusion,z
Therefore,zThe book is aboutzWilliam Cooper wrotezThe main themes arezBased on the documents:z**From    N
Tc              3   h   K   | ])  }t        j                  |t         j                         + y wNrematch
IGNORECASE).0patternline_strippeds     r(   	<genexpr>z"strip_reasoning.<locals>.<genexpr>   s"     mw288G]BMMJm   /2z+^Document \d+ (is|talks|discusses|mentions)   2   Fd      

c              3   h   K   | ])  }t        j                  |t         j                         + y wr0   r1   )r5   pparas     r(   r8   z"strip_reasoning.<locals>.<genexpr>   s#     *hPQ288AtR]]+K*hr9   )splitstripreplacelowerfindlenanyr2   r3   r4   r   joinreversed)r*   reasoning_startersanswer_markers
text_loweranswer_startmarkerposlinesclean_linesskip_reasoningconsecutive_reasoninglineis_reasoningcleaned
paragraphsr7   rA   s                  @@r(   strip_reasoningrY   A   se   
  Tzz*%b)//1 DZt3zz)$Q'--/ <<5r:@@BD<<-r288:D	N JL  #ooflln-"9r!S<%7"	# aL3t9>9LM"((** JJtEKN )

 - mZlmm 88BMSUS`S`aL!Q&! =!B&n!&()%""4(-)0 ii$**,G 7|cc$i#oZZ'
Z( 	D::<D4y3s*hUg*h'h	
 7'4'    c                     t        j                  dd|       } t        j                  dd| t         j                        } t        j                  dd|       j                         } | S )z)Remove metadata artifacts from chunk textz\(Chunk \d+\)r	   zAdjacent Context:.*?\nflags\s+ )r2   subr4   rC   )r*   s    r(   clean_chunk_textra      sP     66"B-D66+RR]]KD66&#t$**,DKrZ   c                     t        j                  dd| xs dj                         j                               }|syt	        j
                  |dd j                  d            j                         S )z:Stable-ish fingerprint for deduping near-identical chunks.r^   r_   r	   NiX  zutf-8)r2   r`   rC   rE   hashlibmd5encode	hexdigest)r*   
normalizeds     r(   _chunk_fingerprintrh      sZ    djb%7%7%9%?%?%ABJ;;z$3'..w78BBDDrZ   chunksc                     t               }g }| D ]G  }t        |j                  dd            }|s!||v r&|j                  |       |j	                  |       I |S )zIRemove duplicate/near-identical chunks to reduce repetition in synthesis.r   r	   )setrh   getaddr   )ri   seendedupedchunkfps        r(   dedupe_chunksrr      s`    5DG 		)R 89:u NrZ   	min_scorec                     g }| D ]I  }|j                  dd      }||k\  st        |j                  dd            |d<   |j                  |       K |S )z"Filter chunks by score and qualityscorer   r   r	   )rl   ra   r   )ri   rs   filteredrp   ru   s        r(   filter_chunksrw      s[    H #		'1%I/		)R0HIE)OOE"	#
 OrZ   c                 N   i }| D ]z  }|j                  di       j                  dd      }t        j                  dd|      }|j                  dd      j                  dd      }||vrg ||<   ||   j	                  |       | |D ]  }t        ||   d	 d
      dt         ||<     |S )z%Group chunks by their source documentmetadatasourceUnknown\.pdf$|\.txt$r	   _r_   -c                 &    | j                  dd      S )Nru   r   rl   xs    r(   <lambda>z*group_chunks_by_document.<locals>.<lambda>   s    gq@Q rZ   TkeyreverseN)rl   r2   r`   rD   r   sortedMAX_CHUNKS_PER_DOC)ri   groupedrp   rz   s       r(   group_chunks_by_documentr      s    G &:r*..xC("f5S)11#s;  GFOu%&  v 6Q[_`atbtuv NrZ   doc_namequeryc                    |syg }t               }|D ]  }t        |j                  dd            }t        |      dk  r-|dd j	                         }t        |      }|r||v rR|r|j                  |       |j                  |       t        |      dk\  s n |sydj                  |D 	cg c]  }	d|	 	 c}	      }
|
S c c}	w )	z
    Build a compact, grounded snippet from top chunks.
    We prefer verbatim excerpts (trimmed) over weak extractive summaries.
    r	   r   (   Ni  r-   r.   z- )	rk   ra   rl   rG   rC   rh   rm   r   rI   )r   ri   r   excerptsrn   rp   r   excerptrq   r'   bulletss              r(   summarize_document_chunksr      s    
  H5D "599Y#;<w<"$3-%%'("*HHRL x=A ii84a2aS45GN 5s   ,C
raw_chunksc                    | j                         }t        t        j                  d|            }t               }|D ]8  }|j	                  di       j	                  dd      }|s(|j                  |       : i }|D ]  }t        j                  dd|t        j                        }	|	j                  dd      j                  d	d      }	h d
}
t        j                  d|	      D cg c]%  }|j                         |
vs|j                         ' }}|D ]  }||vrg ||<   ||   j                  |       !  t               }|D ]L  }||v r|j                  ||          |j                         D ]  \  }}||v s||v s|j                  |       ! N g }|D ]  }|j	                  dd      }|j	                  di       j	                  dd      }d}||v rd}||z   }|dk\  s|dkD  sOt        |j	                  dd            |d<   ||d<   |j                  |        |j                  d d       t        |      }|syt        |dd       }g }g }|j!                         D ]K  }|j                         t#        fd|D              }|r|j                  |       ;|j                  |       M |j%                  |       g }|D ].  }||   }t'        |||       }|s|j                  d| d|        0 |sydj)                  |      S c c}w )z
    LAYER 1-2: Transform raw chunks into clean, organized context.
    
    Args:
        query: User's question
        raw_chunks: Raw chunks from vector search
        
    Returns:
        Enhanced, organized context string
    z\b\w+\bry   rz   r	   r|   r\   r}   r_   r~   >	   byandcomfororgpdfthewww
oceanofpdfz
\b\w{3,}\bru   r   g      @g333333?r   adjusted_scorec                 F    | j                  d| j                  dd            S )Nr   ru   r   r   r   s    r(   r   z+enhance_retrieved_context.<locals>.<lambda>P  s    &6gq8I J rZ   Tr   zANo sufficiently relevant information found in the knowledge base.N   c              3   D   K   | ]  }t        |      d kD  s|v   yw)r
   N)rG   )r5   qw	clean_docs     r(   r8   z,enhance_retrieved_context.<locals>.<genexpr>a  s     QSWq[B)OQs    	 zSOURCE: r.   r>   )rE   rk   r2   findallrl   rm   r`   r4   rD   r   updateitemsra   sortrr   r   keysrH   extendr   rI   )r   r   query_lowerquery_words	doc_namesrp   rz   doc_keywordsr   
clean_name
stop_wordswwordswordmatching_docs
query_wordkeyworddocsrv   ru   boostr   r   ordered_docs
other_docsmatches_querycontext_parts
doc_chunkssummaryr   s                                @r(   enhance_retrieved_contextr     s;    ++-Kbjj[9:K I ":r*..x<MM&!" L 0VV,b("--P
''S199#sC
Z
$&JJ}j$IiqQWWY^hMhii 	0D<'%'T"%%h/	00 EM! +
%  j!9:)//1 	+MGTW$:(=$$T*	+	+ H #		'1%:r*..x< ]"ES EAI/		)R0HIE)&4E"#OOE"#" MMJTXMY X&HR 'x}5G LJLLN (NN$	QkQQ)h'( 
#M  CX&
+Hj%H  8H:Ry!AB	C R;;}%%O js   $L<Lenhanced_contextuse_grokc                     |xr d|v}|r0t         r*|r(d}d|  d| d}t        ||ddd	      }|rt        |      S | }|S )
a4  
    LAYER 3: Generate final coherent answer using LLM.
    
    Args:
        query: User's question
        enhanced_context: Clean, organized context from Layer 1-2
        use_grok: Whether to use Grok API (falls back to simple format if False)
        
    Returns:
        Final synthesized answer
    z$No sufficiently relevant informationa  You are CashHive RAG, an expert research assistant.
You MUST follow these rules:
- Use ONLY the provided SOURCES and their excerpts. Do not add outside facts.
- Write a single coherent answer that merges the sources.
- After factual claims, add citations like [SourceName]. Multiple citations allowed.
- If sources conflict, explicitly note the conflict and cite both sides.
- If the sources do not contain enough info, say what is missing.
- Do NOT reveal chain-of-thought or hidden reasoning.
z
QUESTION:
z

SOURCES (verbatim excerpts):
z9

Write the best possible grounded answer with citations.  皙?K   )r   r   r   r   r   )r   r)   rY   )r   r   r   has_contextr   synthesis_promptgrok_responseanswers           r(   synthesize_answerr   v  s     #e'MUe'eKL[F 	     8	; &#'
 "=11 !!FMrZ   raw_responsec                     |j                  dg       }|j                  dd      t        | |      }dv xs$ t              dk  xs t        fddD              }|s	 t	        | |      }||d	d
 dt        |      dS )z
    Main entry point: Process RAG response through 3-layer pipeline.
    
    Args:
        query: Original user query
        raw_response: Raw response from RAG /ask endpoint
        
    Returns:
        Enhanced response with synthesized answer
    sourcesr   r	   Chunkr;   c              3   H   K   | ]  }|d d j                         v   y w)Nr<   )rE   )r5   r   original_answers     r(   r8   z'process_rag_response.<locals>.<genexpr>  s&     `dDODS)//11`s   ")zlet mez
looking atzi seeTN   )r   r   enhancedsource_count)rl   r   rG   rH   r   rY   )r   r   r   r   needs_enhancementfinal_answerr   s         @r(   process_rag_responser     s     !!)R0J"&&x4O 1
C 	?" 	aOr!	a`>_``  D(0@A
 bq>J	 rZ   __main__z:(Chunk 72) William Cooper discusses UFOs and government...g333333?rz   zWilliam_Cooper_Pale_Horse.pdf)r   ru   ry   z(The secret societies have long hidden...g
ףp=
?z%Hasheesh was introduced by doctors...g?zHasheesh_Eater.pdfz2What are the main themes in William Cooper's book?zEnhanced Context:)Nr   r   <   )T)%__doc__r2   r   r   osrc   typingr   r   r   grok_apir   r   r!   ImportErrorgetenvMIN_CHUNK_SCOREr   strintfloatr)   rY   ra   rh   rr   rw   r   r   r   boolr   r   __name__test_chunksresultr$    rZ   r(   <module>r      sm   
   	  ' '19L  
 $($$C=$ $ 	$
 $ c]$Ng(# g(# g(T3 3 ES ES E$t* d  :I $t*  TRVZ T$Z Dd4j4I ( T$Z  PS @f&S f&d4j f&S f&R/S /C /4 /SV /d" "4 "D "L zP[_nv  yX  nY  	Z>\d  gF  \G  	H;dYacwXxyK ''[]hiF	
	&M }  299]B/L9LJs   
D   DD