
    iP                        S SK rS SK7  S SKrS SKrS rS r\S 4S jr	SS jr
SS jr\rSS jrS	 r\rSS
 jrS rS r\rSS jr\rSS jr " S S5      rS S jr " S S5      r\S4S jrS!S jr " S S5      rS r\rS r\ r!S"S jr g)#    N)*c                    [         R                  " U SS9n U R                  u  p#[         R                  " X!4SS9n[         R                  " X!4SS9n[        R
                  " 5       n[        U5      Ul        [        U5      Ul        X&l	        Xl
        UR                  5         UR                  U[        U 5      5        UR                  5         XT4$ )zLreturn k smallest values (and their indices) of the lines of a
float32 arrayfloat32dtypeint64)npascontiguousarrayshapezerosfaissfloat_maxheap_array_tswig_ptridsvalnhkheapifyaddnreorderarrayr   mnIDhas          d/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/faiss/extra_wrappers.pykminr             i8E;;DA
!w'A
!y)A		$	$	&Ba[BFa[BFEDJJLGGAxJJL4K    c                    [         R                  " U SS9n U R                  u  p#[         R                  " X!4SS9n[         R                  " X!4SS9n[        R
                  " 5       n[        U5      Ul        [        U5      Ul        X&l	        Xl
        UR                  5         UR                  U[        U 5      5        UR                  5         XT4$ )zKreturn k largest values (and their indices) of the lines of a
float32 arrayr   r   r   )r	   r
   r   r   r   float_minheap_array_tr   r   r   r   r   r   r   r   r   s          r   kmaxr$   +   r    r!   c                    [         R                  " U SS9n [         R                  " USS9nU R                  u  pEUR                  u  pgXW:X  d   e[         R                  " XF4SS9nU[        :X  a,  [        XT[        U 5      U[        U5      [        U5      5        U$ U[        :X  a  XR                  -  USS& U$ [        XT[        U 5      U[        U5      X#[        U5      5        U$ )zFcompute the whole pairwise distance matrix between two sets of
vectorsr   r   N)
r	   r
   r   empty	METRIC_L2pairwise_L2sqrr   METRIC_INNER_PRODUCTTpairwise_extra_distances)	xqxbmetric
metric_argnqdnbd2diss	            r   pairwise_distancesr5   =   s     
		b		2B			b		2BHHEBXXFB7N7
((B89
-C8B<SM	 J 
'	'ddA J 	!8B<SM		
 Jr!   c                 p    [         R                  " U SS9n[        [        U5      UR                  U5        U$ Nr   r   )r	   r&   
float_randr   sizer   seedress      r   randr=   V   s+    
((1I
&Cx}chh-Jr!   c                     [         R                  " U SS9nUc"  [        [        U5      UR                  U5        U$ [        [        U5      UR                  X!5        U$ Nr   r   )r	   r&   
int64_randr   r9   int64_rand_max)r   r;   vmaxr<   s       r   randintrC   \   sM    
((1G
$C|8C=#((D1 J 	x}chh;Jr!   c                 p    [         R                  " U SS9n[        [        U5      UR                  U5        U$ r7   )r	   r&   float_randnr   r9   r:   s      r   randnrF   h   s+    
((1I
&Csxx.Jr!   c                 
   U R                  S5      n U R                  S:X  a  [        U R                  [	        U 5      5      $ U R
                  u  p[        R                  " USS9n[        X[	        U 5      [	        U5      5        U$ )z=compute a checksum for quick-and-dirty comparisons of arrays uint8   uint64r   )	viewndimbvec_checksumr9   r   r   r	   r   bvecs_checksum)ar   r1   css       r   checksumrQ   n   sc    	wAvv{QVVXa[1177DA	!8	$B1!hrl3Ir!   c                 ^    [         R                  " X4SS9n[        X[        U5      U5        U$ r7   )r	   r&   rand_smooth_vectors_cr   )r   r1   r;   r<   s       r   rand_smooth_vectorsrT   z   s)    
((A6
+C!t4Jr!   c                 R   [         R                  " U SS9n [         R                  " USS9nU R                  S   nUR                  S   U:X  d   eU R                  S   UR                  S   pCSn[        U5       H*  nU[	        U[        X   5      U[        X   5      5      -  nM,     U$ )z;size of intersection between each line of two result tablesr   r   r   rI   )r	   r
   r   rangeranklist_intersection_sizer   )I1I2r   k1k2ninteris          r   eval_intersectionr^      s    			b	0B			b	0B
A88A;!XXa["((1+F1X,Xbe_6 	6  Mr!   c                 d    [        U R                  S   U R                  S   [        U 5      5        g )NrI   r   )fvec_renorm_L2r   r   xs    r   normalize_L2rc      s"    1771:qwwqz8A;7r!   c           	         [         R                  " U SS9n Uc  [        U R                  5       S-   5      n[         R                  " US-   SS9n[         R                  " U R
                  SS9n[        U R
                  [        R                  " U R                  S5      5      U[        R                  " U5      [        R                  " U5      U5        X44$ )a  Perform a bucket sort on a table of integers.

Parameters
----------
tab : array_like
    elements to sort, max value nbucket - 1
nbucket : integer
    number of buckets, None if unknown
nt : integer
    number of threads to use (0 = use unthreaded codepath)

Returns
-------
lims : array_like
    cumulative sum of bucket sizes (size vmax + 1)
perm : array_like
    perm[lims[i] : lims[i + 1]] contains the indices of bucket #i (size tab.size)
r   r   rI   rJ   )
r	   r
   intmaxr&   r9   bucket_sort_cr   r   rK   )tabnbucketntlimsperms        r   bucket_sortrm      s    & 

s'
2Ccggi!m$88GaKw/D88CHHG,D%..(!34%u~~d';

 :r!   c           	      B   U R                   S:X  d  U R                   S:X  d   eU R                  u  p4Uc  [        U R                  5       S-   5      n[        R
                  " US-   SS9n[        X4[        R                  " U 5      U[        R                  " U5      U5        U$ )af  Perform a bucket sort on a matrix, recording the original
row of each element.

Parameters
----------
tab : array_like
    array of size (N, ncol) that contains the bucket ids, maximum
    value nbucket - 1.
    On output, it the elements are shuffled such that the flat array
    tab.ravel()[lims[i] : lims[i + 1]] contains the row numbers
    of each bucket entry.
nbucket : integer
    number of buckets (the maximum value in tab should be nbucket - 1)
nt : integer
    number of threads to use (0 = use unthreaded codepath)

Returns
-------
lims : array_like
    cumulative sum of bucket sizes (size vmax + 1)
int32r   rI   r   )	r   r   re   rf   r	   r&   matrix_bucket_sort_inplace_cr   r   )rh   ri   rj   nrowncolrk   s         r   matrix_bucket_sort_inplacers      s    , 99399#777JDcggi!m$88GaKw/D ENN3'%

 Kr!   c                   4    \ rS rSrSrS	S jrS rS rS rSr	g)

ResultHeap   z[Accumulate query results from a sliced dataset. The final result will
be in self.D, self.I.c                 p   [         R                  " X4SS9U l        [         R                  " X4SS9U l        XsU l        U l        U(       a  [        5       nO
[        5       nX$l        Xl        [        U R                  5      Ul
        [        U R                  5      Ul        UR                  5         X@l        g)z{
nq: number of query vectors,
k: number of results per query
keep_max: keep the top-k maximum values instead of the minima
r   r   r   N)r	   r   r   r   r0   r   r#   r   r   r   r   r   r   heaps)selfr0   r   keep_maxrx   s        r   __init__ResultHeap.__init__   s     2'12'3)+E)+ETVV$	TVV$	
r!   c                    UR                   u  p4[        R                  " USS9n[        R                  " USS9nUR                   X44:X  d   eX0R                  :X  d   eU R                  R                  U[        U5      [        U5      U5        g)z{
Add results for all heaps
D, I should be of size (nh, nres)
D, I do not need to be in a particular order (heap or sorted)
r   r   r   N)r   r	   r
   r0   rx   addn_with_idsr   )ry   r   r   r0   kds        r   
add_resultResultHeap.add_result   sy       )4  '2ww2("""WW}}

  QK	r!   c           	         UR                   u  pEU[        U5      :X  d   eUR                  S:X  a  UR                   UR                   :X  d#  UR                  S:X  a  UR                   U4:X  d   e[        R                  " USS9n[        R                  " USS9n[        R                  " USS9nUR                  S:X  a  SOUnU R
                  R                  U[        U5      U[        U5      [        U5      U5        g)z
Add results for a subset of heaps.
D, I should hold resutls for all the subset
as a special case, if I is 1D, then all ids are assumed to be the same
   rI   r   r   r   r   N)r   lenrL   r	   r
   rx   addn_query_subset_with_idsr   )ry   subsetr   r   nsubsetr   	id_strides          r   add_result_subsetResultHeap.add_result_subset  s     gg#f+%%%FFaKAGGqww.FFaKAGGv-	
.   )4  '2%%fG<1A"	

--Xf%Xa[)	
r!   c                 8    U R                   R                  5         g N)rx   r   )ry   s    r   finalizeResultHeap.finalize  s    

r!   )r   r   rx   r   r0   NF)
__name__
__module____qualname____firstlineno____doc__r{   r   r   r   __static_attributes__ r!   r   ru   ru      s    (
*r!   ru   c                 f   UR                   U R                   :X  d   eU R                   u  p4n[        R                  " XE4U R                  S9n[        R                  " XE4UR                  S9nU(       a  [        O[
        nU" XEU[        U 5      [        U5      [        U5      [        U5      5        Xg4$ )z
Merge a set of sorted knn-results obtained from different shards in a dataset
Dall and Iall are of size (nshard, nq, k) each D[i, j] should be sorted
returns D, I of size (nq, k) as the merged result set
r   )r   r	   r&   r   merge_knn_results_CMaxmerge_knn_results_CMinr   )	DallIallrz   nshardr   r   DnewInewfuncs	            r   merge_knn_resultsr     s     ::###::LFq88QF$**-D88QF$**-D%-!3ID	f
 :r!   c                   &    \ rS rSrS rS rS rSrg)MapInt64ToInt64i1  c                 ,   [        [        R                  " U5      5      U l        USU R                  -  :X  d   S5       eXl        [        R
                  " US4SS9U l        [        R                  " U R                  [        U R                  5      5        g )Nr   zneed power of 2 capacityr   r   )
re   r	   log2log2_capacitycapacityr&   rh   r   hashtable_int64_to_int64_initr   )ry   r   s     r   r{   MapInt64ToInt64.__init__3  sq     !231 2 222N4NN2 88XqM9++D,>,>@RSr!   c           	          UR                   u  nUR                   U4:X  d   e[        R                  " U R                  [	        U R
                  5      U[	        U5      [	        U5      5        g r   )r   r   hashtable_int64_to_int64_addr   r   rh   )ry   keysvalsr   s       r   addMapInt64ToInt64.add:  sR    ZZzzaT!!!** 2x~x~	/r!   c           	          UR                   u  n[        R                  " U4SS9n[        R                  " U R
                  [        U R                  5      U[        U5      [        U5      5        U$ r?   )r   r	   r&   r   hashtable_int64_to_int64_lookupr   r   rh   )ry   r   r   r   s       r   lookupMapInt64ToInt64.lookupA  sW    ZZxxG,-- 2x~x~	/ r!   )r   r   rh   N)r   r   r   r   r{   r   r   r   r   r!   r   r   r   1  s    T/r!   r           c                 h   [         R                  " U SS9n [         R                  " USS9nU R                  u  pVUR                  u  pxXh:X  d   e[         R                  " XR4SS9n	[         R                  " XR4SS9n
U[        :X  a7  [        [        U 5      [        U5      XeXr[        U
5      [        U	5      5        X4$ U[        :X  a7  [        [        U 5      [        U5      XeXr[        U
5      [        U	5      5        X4$ [        [        U 5      [        U5      XeXsXB[        U
5      [        U	5      5
        X4$ )au  
Compute the k nearest neighbors of a vector without constructing an index


Parameters
----------
xq : array_like
    Query vectors, shape (nq, d) where the dimension d is that same as xb
    `dtype` must be float32.
xb : array_like
    Database vectors, shape (nb, d) where dimension d is the same as xq
    `dtype` must be float32.
k : int
    Number of nearest neighbors.
metric : MetricType, optional
    distance measure to use (either METRIC_L2 or METRIC_INNER_PRODUCT)

Returns
-------
D : array_like
    Distances of the nearest neighbors, shape (nq, k)
I : array_like
    Labels of the nearest neighbors, shape (nq, k)
r   r   r   )
r	   r
   r   r&   r'   	knn_L2sqrr   r)   knn_inner_productknn_extra_metrics)r,   r-   r   r.   r/   r0   r1   r2   r3   r   r   s              r   knnr   M  s   2 
		b		2B			b		2BHHEBXXFB7N7
"(A
"	*ARL(2,2(1+x{	
  4K 
'	'RL(2,2(1+x{	
 4K 	RL(2,2zQK!	
 4Kr!   c                    U R                   u  pEUR                   u  pgXW:X  d   e[        R                  " XB4SS9n[        R                  " XB4SS9n	US:X  a  [        R                  " 5       n
X*l        XJl        [        R                  " U	5      U
l        [        R                  " U5      U
l	        [        R                  " U
[        R                  " U 5      [        R                  " U5      UUS5        X4$ US:X  an  [        R                  " [        R                  " U 5      [        R                  " U5      XFX%[        R                  " U5      [        R                  " U	5      5        X4$ [        e)a^  
Compute the k nearest neighbors of a set of vectors without constructing an index.

Parameters
----------
xq : array_like
    Query vectors, shape (nq, d) where d is the number of bits / 8
    `dtype` must be uint8.
xb : array_like
    Database vectors, shape (nb, d) where d is the number of bits / 8
    `dtype` must be uint8.
k : int
    Number of nearest neighbors.
variant : string
    Function variant to use, either "mc" (counter) or "hc" (heap)

Returns
-------
D : array_like
    Distances of the nearest neighbors, shape (nq, k)
I : array_like
    Labels of the nearest neighbors, shape (nq, k)
ro   r   r   hcrI   mc)r   r	   r&   r   int_maxheap_array_tr   r   r   r   r   hammings_knn_hchammings_knn_mcNotImplementedError)r,   r-   r   variantr0   r1   r2   r3   r   r   heaps              r   knn_hammingr     s   2 HHEBXXFB7N7
"(A
"(A$((*>>!$>>!$%..$ennR&8"q	
 4K 
DNN2r 2BANN1u~~a0	
 4K "!r!   c                   >    \ rS rSrSrS rS rS
S jrSS jrS r	S	r
g)Kmeansi  a_  Object that performs k-means clustering and manages the centroids.
The `Kmeans` class is essentially a wrapper around the C++ `Clustering` object.

Parameters
----------
d : int
   dimension of the vectors to cluster
k : int
   number of clusters
gpu: bool or int, optional
   False: don't use GPU
   True: use all GPUs
   number: use this many GPUs
progressive_dim_steps:
    use a progressive dimension clustering (with that number of steps)

Subsequent parameters are fields of the Clustring object. The most important are:

niter: int, optional
   clustering iterations
nredo: int, optional
   redo clustering this many times and keep best
verbose: bool, optional
spherical: bool, optional
   do we want normalized centroids?
int_centroids: bool, optional
   round centroids coordinates to integer
seed: int, optional
   seed for the random number generator

c                 |   Xl         U R                  U5        SU l        SU;   a  [        5       U l        O[        5       U l        UR                  5        HU  u  p$US:X  a  US:X  d  US:X  a
  [        5       nX@l        M)  [        U R                  U5        [        U R                  X$5        MW     U R                  5         g)zd: input dimension, k: nb of centroids. Additional
parameters are passed on the ClusteringParameters object,
including niter=25, verbose=False, spherical = False
Fprogressive_dim_stepsgpuTN)r1   resetr   "ProgressiveDimClusteringParameterscpClusteringParametersitemsget_num_gpusgetattrsetattr	set_index)ry   r1   r   kwargsvs        r   r{   Kmeans.__init__  s    
 

1"f,8:DG*,DGLLNDAEz9R$A #& # 	r!   c                    U R                   nU R                  R                  [        :X  a}  U R                  R                  (       a  [        U5      U l        O[        U5      U l        U R                  (       a/  [        R                  " U R                  U R                  S9U l        g g U R                  (       a  [        U R                  S9nO
[        5       nX l        g )N)ngpu)r1   r   	__class__r   	sphericalIndexFlatIPindexIndexFlatL2r   r   index_cpu_to_all_gpusGpuProgressiveDimIndexFactoryProgressiveDimIndexFactoryfac)ry   r1   r   s      r   r   Kmeans.set_index  s    FF77 44ww  (^
(^
xx"88$((S
  xx3B02Hr!   Nc                 T    Ub  [        U5      U l        SU l        SU l        SU l        g)z^prepare k-means object to perform a new clustering, possibly
with another number of centroids N)re   r   	centroidsobjiteration_stats)ry   r   s     r   r   Kmeans.reset  s*     =VDF#r!   c                    [         R                  " USS9nUR                  u  pEXPR                  :X  d   eU R                  R
                  [        :X  a  [        XPR                  U R                  5      nUbD  UR                  u  pxX:X  d   e[        R                  " UR                  5       UR                  5        UR                  XR                  U5        OmUb   eUb   eU R                  R                  (       a   e[!        XPR                  U R                  5      nUR                  U[#        U5      U R$                  5        [        R&                  " UR                  5      n	U	R)                  U R                  U5      U l        UR*                  n
[-        U
R/                  5       5       Vs/ s H  oR1                  U5      PM     n
n[         R2                  " U
 Vs/ s H  oR4                  PM     sn5      U l        SR7                  5       nU
 VVs/ s H  nU Vs0 s H  o[9        X5      _M     snPM!     snnU l        U R4                  R.                  S:  a  U R4                  S   $ S$ s  snf s  snf s  snf s  snnf )a|  Perform k-means clustering.
On output of the function call:

- the centroids are in the centroids field of size (`k`, `d`).

- the objective value at each iteration is in the array obj (size `niter`)

- detailed optimization statistics are in the array iteration_stats.

Parameters
----------
x : array_like
    Training vectors, shape (n, d), `dtype` must be float32 and n should
    be larger than the number of clusters `k`.
weights : array_like
    weight associated to each vector, shape `n`
init_centroids : array_like
    initial set of centroids, shape (n, d)

Returns
-------
final_obj: float
    final optimization objective

r   r   z,obj time time_search imbalance_factor nsplitr   r   r   )r	   r
   r   r1   r   r   r   
Clusteringr   r   copy_array_to_vectorravelr   trainr   r   ProgressiveDimClusteringr   r   vector_float_to_arrayreshaper   rV   r9   atr   r   splitr   )ry   rb   weightsinit_centroidsr   r1   clusncr3   r   statsr]   ststat_fieldsfields                  r   r   Kmeans.train  s   4   )4wwFF{{77 44a1D)'--ww**>+?+?+A4>>RJJq**g. ?"?!)))ww((((+Avvtww?DJJq(1+txx0//?	"**46615$$&+EJJL&9:&9!&9:88e4eVVe45DJJL  
 5@@K5GB&&K@ 
  $xx}}q0txx|9c9 ;4 A 
s$   ,I,I1
I;I61I;6I;c                 N   [         R                  " USS9nU R                  c   S5       eU R                  R	                  5         U R                  R                  U R                  5        U R                  R                  US5      u  p#UR                  5       UR                  5       4$ )Nr   r   zshould train before assigningrI   )r	   r
   r   r   r   r   searchr   )ry   rb   r   r   s       r   assignKmeans.assignH  s}      )4~~)J+JJ)



t~~&zz  A&wwy!'')##r!   )	r   r   r1   r   r   r   r   r   r   r   )NN)r   r   r   r   r   r{   r   r   r   r   r   r   r!   r   r   r     s"    @. $::x$r!   r   c                 J    [        U [        R                  R                  5      $ r   )
isinstancecollectionsabcSequencera   s    r   is_sequencer  U  s    a1122r!   c           	          U R                   u  p#[        R                  " U SS9n [        U5      (       a  [        R                  " USS9nUR                   U4:X  d   e[	        UR                  5       S-   S-  5      n[        R                  " X$4SS9n[        X#[        U5      [        U 5      [        U5      U5        U$ X1-  S-   S-  n[        R                  " X$4SS9n[        X#U[        U 5      [        U5      U5        U$ )a"  
Pack a set integers (i, j) where i=0:n and j=0:M into
n bitstrings.
Output is an uint8 array of size (n, code_size), where code_size is
such that at most 7 bits per code are wasted.

If nbit is an integer: all entries takes nbit bits.
If nbit is an array: entry (i, j) takes nbit[j] bits.
ro   r         rH   )	r   r	   r
   r  re   sumr&   pack_bitstrings_cr   )rO   nbitr   M	code_sizebs         r   pack_bitstringsr  Z  s     77DA
Qg.A4##D8zzaT!!!aA-.	HHa^73(4.(1+x{I	G H X\a'	HHa^73!hqk8A;	JHr!   c           
         U R                   u  p4Uc  [        R                  " USS9n[        U5      n[	        UR                  5       S-   S-  5      nXF:  d   e[        R                  " X54SS9n[        X5[        U5      [        U 5      U[        U5      5        U$ UnXR-  S-   S-  nXF:  d   e[        R                  " X54SS9n[        X5U[        U 5      U[        U5      5        U$ )ax  
Unpack a set integers (i, j) where i=0:n and j=0:M from
n bitstrings (encoded as uint8s).
Input is an uint8 array of size (n, code_size), where code_size is
such that at most 7 bits per code are wasted.

Two forms:
- when called with (array, M, nbit): there are M entries of size
  nbit per row
- when called with (array, nbits): element (i, j) is encoded in
  nbits[j] bits
ro   r   r  r  )	r   r	   r
   r   re   r  r&   unpack_bitstrings_cr   )r  
M_or_nbitsr  r   r
  r	  min_code_sizerO   s           r   unpack_bitstringsr  u  s     77LA|##Jg>ITXXZ!^12)))HHaV7+(4.QKHQK	1 H A!+)))HHaV7+$Y	=Hr!   )90  )r  N)i  )Nr   r   )r   r   )"numpyr	   faiss.loaderr   collections.abcr   r   r$   r'   r5   r=   rC   lrandrF   rQ   rT   rS   r^   rc   rm   rg   rs   rp   ru   r   r   r   r   r   r  r  r  r  r  r   r!   r   <module>r     s       $$ '0A 2 	 , 8 >  :  N= =@, 8 $ 3l0pS$ S$t3 $ 2 ( r!   