
    q9i_                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(Z)d dl*mZ+ d dl,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z=m>Z>m?Z?m@Z@  e"d      ZAde eAe!eA   f   de!eA   fdZBdedefdZCdee-j                     dee-j                     ddfdZEd e0deddfd!ZFd e0d"ed#e e'd$   e'd%   e'd&   f   ddfd'ZGd e0deddfd(ZHd e0deddfd)ZId e0deddfd*ZJd e0deddfd+ZKd e0ddfd,ZLe9j                  fd-e-j                  d.e-j                  d/ee+j                  e+j                  gePf   dee!e!eQ      e!e!eP      f   fd0ZRd1eQddfd2ZSd e0fd3ZT	 	 	 	 	 	 dHd e0ded4eQd5ePd6ee-j                     d7ee!eQ      d8ee-j                     d9eVddfd:ZWd;e-j                  d4eQddfd<ZYd=edeQfd>ZZd?ed@e!e0   dAeVddfdBZ[d?ed@e!e0   de%eeQf   fdCZ\d?ed@e!e0   dAeVddfdDZ]edEe.dFe^fdG       Z_y)I    N)UUID)contextmanager)
SegmentAPI)SysDB)create_topic_name)System)get_sql)SqliteDB)sleep)SegmentType)NormalizedRecordSet	RecordSet)	CallableOptionalTupleUnionListTypeVarcastAnyDict)Literal)types	ClientAPI)
Collection)note)InvalidArgument)Table	functions)distance_functions)Search)KnnSelectLimitKeyTvaluereturnc                 F    | t        d      t        | t              r| S | gS )z*Wrap a value in a list if it is not a listzvalue cannot be None)r   
isinstancer   )r'   s    j/var/www/html/leadgen/airagagent/rag_env/lib/python3.12/site-packages/chromadb/test/property/invariants.pywrapr,   $   s)    }455	E4	 w    
record_setc                 |   | d   d}nt        | d   t              r| d   J t        | d         dkD  rt        d | d   D              rt	        t
        j                  | d         }nt        d | d   D              rt	        t
        j                  | d         }nht        d | d   D              rt	        t
        j                  | d   g      }n4t        d      t	        t
        j                  | d         }nt        d      t        | d	         | d
   t        | d
         nd| d   t        | d         |dS d|dS )z9Ensure that an embedding set has lists for all its values
embeddingsNr   c              3   <   K   | ]  }t        |t                y wN)r*   list.0	embeddings     r+   	<genexpr>zwrap_all.<locals>.<genexpr>7   s      09
9d+s   c              3   P   K   | ]  }t        |t        j                           y wr2   )r*   npndarrayr4   s     r+   r7   zwrap_all.<locals>.<genexpr>;   s#       9bjj1s   $&c              3      K   | ]8  }t        |t        t        t        j                  t        j
                  f       : y wr2   )r*   intfloatr9   integerfloatingr5   es     r+   r7   zwrap_all.<locals>.<genexpr>A   s0       q3rzz2;;"GHs   >A z-an embedding must be a list of floats or intszVembeddings must be a list of lists, a list of numpy arrays, a list of numbers, or Noneids	documents	metadatas)rB   rC   rD   r0   )	r*   r3   lenallr   r   
Embeddingsr   r,   )r.   embedding_lists     r+   wrap_allrI   .   s    ,'	J|,d	3,'333z,'(1, =G=U  "&e&6&6
<8P!Q !+L!9  "&e&6&6
<8P!Q '5  &*%*:*:Z=U<V%WN)G  "%"2"2J|4LMNd
 	

 Ju%&k". *[12k". *[12 %	 	 $	 	r-   expectedgotc           	      d   | || |J | |t        |       t        |      k(  sJ | j                         D ]t  \  }}||v sJ t        | |   t              rIt        ||   t              r6t	        t        t        | |         t        t        ||         z
        dk  rfJ | |   ||   k(  rtJ  y y y )Nư>)rE   itemsr*   r=   absr   )rJ   rK   keyvals       r+   check_metadatarR   ]   s       8}C((( ( 	1HC#::(3-/Js3x4O4x}5UCH8MMNQUUUU}C000	1 !0r-   
collectionc           	          | j                         }t        |      }|t        |d         k7  rt        d|dt        |d                |t        |d         k(  sJ y)z?The given collection count is equal to the number of embeddingsrB   zcount mismatch:z=!N)countrI   rE   print)rS   r.   rU   normalized_record_sets       r+   rU   rU   m   sa    E$Z0)%011c2G2N.OPC-e45555r-   rW   
field_namerC   rD   r0   c                 Z   | j                  |d   |g      t        |d         D ci c]  \  }}||
 c}}|   }t        |d         dk(  r>|dk(  r1t        t        j
                  t           |      j                  dk(  sJ y|g k(  sJ y|J t        t        |      fd      }|D cg c]  \  }}|	 }	}}||   }
|
dgt        |d         z  }
|dk(  r?t        j                  t        j                  |	      t        j                  |
            sJ yt        |	      t        |
      k(  sJ t        |	|
      D ]Y  \  }}
t        |
t              r=t        t        t         j"                  |      t        t         j"                  |
             S||
k(  rYJ  yc c}}w c c}}w )zq
    The actual embedding field is equal to the expected field
    field_name: one of [documents, metadatas]
    rB   rB   includer   r0   Nc                      d   | d         S )NrB   r    )index_and_field_valueembedding_id_to_indexresults    r+   <lambda>z _field_matches.<locals>.<lambda>   s    *?5M/23+
 r-   rP   )get	enumeraterE   r   nptNDArrayr   sizesortedr9   allclosearrayzipr*   dictrR   r   Metadata)rS   rW   rX   iidactual_fieldsorted_field_field_valuefield_valuesexpected_fieldr_   r`   s              @@r+   _field_matchesrv   v   s    ^^ 5e <zl^SF 1::OPU:V0WXuq"RUX*%L
 '(A-%C(,7<<AAA 	  2%%% ###,
L 7CCNA{KCLC*:6N #&;E&B"CC\!{{288L1288N3KLLL< C$7777+.|^+L 	5'K.$/58
 #n444	5? Y& Ds   F!3F'c                     t        |      }| j                  |d   g       d   }t        |d         D ci c]  \  }}||
 c}}t        |fd      }||d   k(  sJ yc c}}w )z5The actual embedding ids is equal to the expected idsrB   rZ   c                     |    S r2   r]   )ro   r_   s    r+   ra   zids_match.<locals>.<lambda>   s    3H3L r-   rb   N)rI   rc   rd   rh   )rS   r.   rW   
actual_idsrn   ro   r_   s         @r+   	ids_matchrz      sy    $Z0$9%$@"MeTJ 1::OPU:V0WXuq"RUX
(LMJ.u5555 Ys   A#c                 4    t        |      }t        | |d       y)z?The actual embedding metadata is equal to the expected metadatarD   NrI   rv   rS   r.   rW   s      r+   metadatas_matchr~          $Z0:4kBr-   c                 4    t        |      }t        | |d       y)AThe actual embedding documents is equal to the expected documentsrC   Nr|   r}   s      r+   documents_matchr      r   r-   c                 4    t        |      }t        | |d       y)r   r0   Nr|   r}   s      r+   embeddings_matchr      s    $Z0:4lCr-   c                 n    | j                         d   }t        |      t        t        |            k(  sJ y )NrB   )rc   rE   set)rS   rB   s     r+   no_duplicatesr      s-    
..
5
!Cs8s3s8}$$$r-   querytargetsdistance_fnc                 6   t        j                  | t         j                        }t        j                  |t         j                        t        j                  fdd|      }t        j                  |      j                         |j                         fS )zGReturn the ordered indices and distances from each query to each target)dtypec                 4    t        j                  d|       S )N   )r9   apply_along_axis)r   r   
np_targetss    r+   ra   z"_exact_distances.<locals>.<lambda>   s    b))+q*eL r-   r   )r9   rj   float32r   argsorttolist)r   r   r   np_query	distancesr   s     `  @r+   _exact_distancesr      sp     xxRZZ0H'4J ##L	I ::i '')9+;+;+===r-   threadpool_sizec                 D   t        j                         }|j                         }d}d}t        |D cg c]  }d|j                  v s|j                   c}      dz
  | kD  rw||k  rrt        j                          |j                         }|dz  }t        d       t        |D cg c]  }d|j                  v s|j                   c}      dz
  | kD  r||k  rrt        |D cg c]  }d|j                  v s|j                   c}      dz
  | k  sJ yc c}w c c}w c c}w )zs
    Checks that the open file descriptors are not exceeding the threadpool size
    works only for SegmentAPI
       r   sqlite3r   N)psutilProcess
open_filesrE   pathgccollectr   )r   current_processr   max_retriesretry_countps         r+    fd_not_exceeding_threadpool_sizer      s   
 nn&O ++-JKK 	Z?9+>QVV?@1DV+%


$//1
qa 	Z?9+>QVV?@1DV+% 	Z?9+>QVV?@1DWW @? @s#   DD#D7D!D5Dc                    d }d| j                   v r| j                   d   }| j                  j                  |S d| j                  j                  v r| j                  j                  j                  d      ]d| j                  j                  j                  d      v r6| j                  j                  j                  d      j                  d      }|S d| j                  j                  v r| j                  j                  j                  d      ]d| j                  j                  j                  d      v r6|4| j                  j                  j                  d      j                  d      }|S )Nz
hnsw:spacespannspacehnsw)metadata_modelconfiguration_jsonrc   )rS   r   s     r+   	get_spacer      s?    Ez***##L1++3:$$7770044W=Iz((;;??HH!!4488AEEgN L 	*##6660044V<Hz((;;??GG=%%88<<VDHHQELr-   	n_results
min_recallembedding_functionquery_indicesquery_embeddings
use_searchc           
         t        |      }t        |d         dk(  ry|d   }	|	duxr t        |	      dkD  }
|
s+|J |d   J t        |d   t              sJ  ||d         }	t	        |       }|t
        j                  }nA|dk(  rt
        j                  }n+|dk(  rt
        j                  }n|dk(  rt
        j                  }d	}| j                  J |	J t        d
 |	D              sJ t        |	d         }|t        j                  dt        t        j                  |                  z  }|||	n|D cg c]  }|	|   	 c}}|d   }|||D cg c]  }||   	 }}t        ||	      \  }}|rg }|D ]o  }t        |t         j"                        r|j%                         }n|}t'        t)        |      t+        |            j-                         }|j/                  |       q | j0                  }|j3                  | j4                  |dd      }t7        t8        j:                  |d   |d   |d   |d   |d   d      }n| j=                  |
r|nd|
s|nd|g d      }t?        ||       |d   J |d   J |d   J |d   J |d   J tA        |d         D ci c]  \  }}||
 }}}d}tA        tC        ||            D ]  \  }\  }}t!        jD                  |d         |d|    }|t        tG        |      tG        |d   |         z
        z  }tA        |d   |         D ]  \  } }||v}!||   }"t!        jH                  ||"   |d   |   |    |      }#|!r	|#r|dz  }n>|#sJ t!        jH                  |	|"   |d   |   |          sJ |d   |d   |"   |d   |   |    k(  sJ |d   tK        |d   |"   |d   |   |             t        |d         }$|$|z
  |$z  }%	 tM        d|% d| d|$ d|        |%|k\  sJ |d   D ]-  }&t!        jH                  t!        jP                  |&      |&      r-J  yc c}w c c}w c c}}w # tN        $ r Y Xw xY w)zBValidate that the API performs nearest_neighbor searches correctlyrB   r   Nr0   rC   cosineipl2rM   c              3   \   K   | ]$  }t        |t        t        j                  f       & y wr2   )r*   r3   r9   r:   r@   s     r+   r7   zann_accuracy.<locals>.<genexpr>>  s      EQz!dBJJ/0Es   *,
   )r   )r   )limit)rankr   default_tenantdefault_database)collection_idsearchestenantdatabasescoresrD   )rB   r   r0   rC   rD   )r0   rC   rD   r   )r   query_textsr   r[   r   )atolr   z
# recall: z
, missing z out of z, accuracy threshold ))rI   rE   r*   r3   r   r    r   r   r   r   rF   mathpowr<   log10r   r9   r:   r   r!   r"   r$   
select_allappend_client_searchro   r   r   QueryResultr    _query_results_are_correct_shaperd   rk   rj   r   ri   rR   r   r   sort)'rS   r.   r   r   r   r   r   r   rW   r0   have_embeddingsr   distance_functionaccuracy_thresholddimrn   query_documentsindicesr   search_requestsquery_embeddingquery_embedding_listsearchapisearch_resultsquery_resultsro   id_to_indexmissing	indices_idistances_iexpected_idsjunexpected_idindexcorrect_distancerg   recalldistance_results'                                          r+   ann_accuracyr     s    %Z0
 '(A--B<-PJ ,DZ11DO!---$[1===/<dCCC'(=k(JK
j!E}.11	(	.55	$.11	$.11***!!!
 E*EEEE
jm
C+dhhr3tzz#;O.PP  $ )67A*Q-7 	
 ,K8O _%@7DE!?1-EE)*2CGY / 
	+O/2::6'6'='='?$'6$34), jl  ""6*
	+   $--$#'	 % 
 U..!%('1(6'4'41
  #((1@-d/>DI	 ) 
 %]I> +++%111&222%111%111 '00Ee0L&MNUQ2q5NKNG'0Wi1H'I $##I{xx 5e <=i
>ST3s<(3}U/CA/F+GGHH }U3A67 	EArl2MOE!{{E"k*1-a0' 
  $qLG''';;z%0-2Ma2PQR2STTT$[1=)+6u=$[1!4Q788 %[1=)+6u=!+.q1!47	$L $U+,DWn$F
7)8D6AVWiVjk	
 Z )5 F{{277?3_EEEFc 8 Fp O^  s$   8Q Q%Q
,Q 	QQr   c                 T    dD ]"  }| |   J t        fd| |   D              r"J  y )N)r   r0   rC   rD   c              3   :   K   | ]  }t        |      k(    y wr2   )rE   )r5   r`   r   s     r+   r7   z3_query_results_are_correct_shape.<locals>.<genexpr>  s      
)/CK9$
s   )rF   )r   r   result_types    ` r+   r   r     sJ     M 
[)555 
3@3M
 
 	
 

r-   sqlitec                    t        d      }| j                         j                  |      }| j                         5 }t	        |j                  t        j                  |j                              | j                               \  }}|j                  ||      }t        t        |j                         d         cd d d        S # 1 sw Y   y xY w)Nembeddings_queuer   )r   querybuilderfrom_txr	   selectr   Countseq_idparameter_formatexecuter   r<   fetchone)r   tqcursqlparamsr`   s          r+   _total_embedding_queue_log_sizer     s     !A##A&A	 /HHY__QXX./1H1H1J
V S&)C*1-./ / /s   A<CC
systemcollectionshas_collection_mutatedc                    | j                  t              }| j                  j                  s| j                  j                  dk(  ry |rht        |      dk\  sJ t        d |D              }t        d |D              }| j                  j                  dk(  r|n||z   }t        |      dz
  |k  sJ y t        |      dk(  sJ y )N!chromadb.api.rust.RustBindingsAPIr   c              3   p   K   | ].  }|j                   |j                   j                  dd      nd 0 y w)Nhnsw:sync_threshold  r   rc   r5   rS   s     r+   r7   z%log_size_below_max.<locals>.<genexpr>  sD      !
  "". ##$94@!
   46c              3   p   K   | ].  }|j                   |j                   j                  dd      nd 0 y w)Nhnsw:batch_sized   r  r  s     r+   r7   z%log_size_below_max.<locals>.<genexpr>  sD      
  "". ##$5s;
r  r   )instancer
   settingsis_persistentchroma_api_implr   sum)r   r   r   r   sync_threshold_sumbatch_size_sumr   s          r+   log_size_below_maxr    s     __X&F OO))OO++/RR.v6!;;; ! !
 *	!
 
  
 *	
 
 ..2UU #n4 	 /v6:eCCC.v6!;;;r-   c           	         | j                  t              }t        d      }|j                         j	                  |      }| j
                  j                  d      }| j
                  j                  d      }|D ci c]  }t        |||j                        | }}|j                         5 }	t        |j                  |j                  t        j                  |j                              j!                  d      |j#                               \  }
}|	j%                  |
|      }i }|j'                         D ]  }|d   |||d      j                  <    |cd d d        S c c}w # 1 sw Y   y xY w)Nr   	tenant_idtopic_namespacetopicr   r   )r  r
   r   r   r   r  requirer   ro   r   r	   r   r  r   r   r   groupbyr   r   fetchall)r   r   r   r   r   _tenant_topic_namespacerS   topic_mappingsr   r   r   r`   outress                  r+   ._total_embedding_queue_log_size_per_collectionr    sO    __X&F !A##A&Aoo%%k2G../@A & 	'#3Z]]CZON  
 	HHQWWiooahh78@@I##%
V S&)??$ 	4C-0VCs1v&))*	4	 			 	s   :E+B&E  E)c                 l   | j                   j                  dk(  ry | j                  t              }|rt	        |      dk\  sJ |D ci c]7  }|j
                  |j                  |j                  j                  dd      nd9 }}|D ci c]0  }|j
                  |j                         ||j
                     z  dz   2 c}t        | |      t        j                               t        j                               k(  sJ t        fd|D              sJ y t	        |      dk(  sJ y c c}w c c}w )Nr  r   r	  r
  c              3   \   K   | ]#  }|j                      |j                      k(   % y wr2   )ro   )r5   rS   actual_sizesexpected_sizess     r+   r7   z:log_size_for_collections_match_expected.<locals>.<genexpr>2  s0      
 '>*--+HH
s   ),r   )r  r  r  r
   r   ro   r   rc   rU   r  r   keysrF   )r   r   r   r   rS   r  r"  r#  s         @@r+   'log_size_for_collections_match_expectedr%    sM    &&*MM__X&F.v6!;;; *	
  MM"". &..223DcJ
 
 *
 MM:++-z}}0MMPQQQ

 FK
 <$$&'3~/B/B/D+EEEE 
)
 
 	
 
 /v6!;;;+

s   <D,
5D1clientcollection_namec           
   #     K   | j                         D cg c]  }|j                   }}||v sJ | j                  |      }g }t        | j                  t
              rJ| j                  j                  }|j                  |j                        }i }d}|D ]  }	d||	d   <   |	d   t        j                  j                  k(  s,|j                  d|j                  v r|j                  d   nd}
|j                         |
kD  skd}t        j                  j!                  t        j                  j#                  | j%                         j&                  t)        |	d                     rJ  |r|t        j                  j                     sJ |t        j*                  j                     sJ d  | j                         D cg c]  }|j                   }}||vsJ t-        |      dkD  r| j                  j                  }|j                  |j                        }t-        |      dk(  sJ |D ]  }	|	d   t        j                  j                  k(  s$t        j                  j!                  t        j                  j#                  | j%                         j&                  t)        |	d                     sJ  y y c c}w c c}w w)	N)rS   FTtyper  r  ro   r   )list_collectionsnameget_collectionr*   _serverr   _sysdbget_segmentsro   r   HNSW_LOCAL_PERSISTEDr'   r   rU   osr   existsjoinget_settingspersist_directorystrSQLITErE   )r&  r'  ccollection_namesrS   segmentssysdbsegment_typesshould_have_hnswsegmentsync_thresholdsegments_afters               r+   collection_deletedrA  ;  s     )/(?(?(AB1BB....&&7JH&..*-~~,,%%%?  	G-1M'&/*v+"B"B"H"HH "**6-1D1DD ''(=> 	  $$&7'+$77>>"//1CCSQUEW  	$  !A!A!G!GHHH[//55666	 )/(?(?(AB1BB"2222
8}q~~,,++z}}+E>"a''' 	Gv+"B"B"H"HH77>>GGLL++-??WT]AS  		 I CD Cs>   K J6BK >K A#K %AK >J;A=K A!K 1K )r   gffffff?NNNF)`r   r   os.pathr1  uuidr   
contextlibr   chromadb.api.segmentr   chromadb.db.systemr   chromadb.ingest.impl.utilsr   chromadb.configr   chromadb.db.baser	   chromadb.db.impl.sqliter
   timer   r   chromadb.segmentr   !chromadb.test.property.strategiesr   r   typingr   r   r   r   r   r   r   r   r   typing_extensionsr   numpyr9   numpy.typingre   chromadb.apir   r   chromadb.api.models.Collectionr   
hypothesisr   hypothesis.errorsr   pypikar   r   chromadb.utilsr    "chromadb.execution.expression.planr!   &chromadb.execution.expression.operatorr"   r#   r$   r%   r&   r,   rI   rm   rR   rU   rv   rz   r~   r   r   r   r   rG   	ArrayLiker=   r<   r   r   r   EmbeddingFunctionboolr   r   r   r   r  r  r%  r6  rA  r]   r-   r+   <module>r]     s   	    % + $ 8 " $ ,   ( L S S S %   ) 5  - # - 5 J JCLaaj! d1g , ,': ,^1u~~&1-5enn-E1	1 6j 6i 6D 64545.45 gk2GL4II45 
45n6* 6) 6 6C
 C	 Cd CC
 C	 Cd CD D Dt D%j %T % 	>>> 	&-> 4S	?De--.>*c d .* 8 <@)-37eFeFeF eF 	eF
 !!8!89eF DI&eF u//0eF eF 
eFP
$$
14
	
	/H 	/ 	/(<(<!%j!1(<KO(<	(<Vj! 
$)_2"<"<!%j!1"<KO"<	"<J 0y 03 0 0r-   