
    q9i"$                         d Z ddlmZmZmZmZmZ ddlmZ ddl	m
Z
mZmZ ddlmZ erddlmZ ddlmZ dd	d
efdZdd	ded
dfdZddZ	 ddd	ded
efdZ	 ddd	dedee
e      d
eeef   fdZy)a  Utility functions for managing collection statistics.

This module provides standalone functions for enabling, disabling, and retrieving
statistics for ChromaDB collections. These functions work with the attached function
system to automatically compute metadata value frequencies.

Example:
    >>> from chromadb.utils.statistics import attach_statistics_function, get_statistics
    >>> import chromadb
    >>>
    >>> client = chromadb.Client()
    >>> collection = client.get_or_create_collection("my_collection")
    >>>
    >>> # Attach statistics function with output collection name
    >>> attach_statistics_function(collection, "my_collection_statistics")
    >>>
    >>> # Add some data
    >>> collection.add(
    ...     ids=["id1", "id2"],
    ...     documents=["doc1", "doc2"],
    ...     metadatas=[{"category": "A"}, {"category": "B"}]
    ... )
    >>>
    >>> # Get statistics from the named output collection
    >>> stats = get_statistics(collection, "my_collection_statistics")
    >>> print(stats)
    )TYPE_CHECKINGOptionalDictAnycast)defaultdict)	OneOrManyWheremaybe_cast_one_to_many)STATISTICS_FUNCTION
Collection)AttachedFunction
collectionr   returnc                      | j                    dS )zGenerate the default name for the statistics attached function.

    Args:
        collection: The collection to generate the name for

    Returns:
        str: The statistics function name
    _stats)name)r   s    b/var/www/html/leadgen/airagagent/rag_env/lib/python3.12/site-packages/chromadb/utils/statistics.pyget_statistics_fn_namer   '   s     oof%%    stats_collection_namer   c                 F    | j                  t        t        |       |d      S )a  Attach statistics collection function to a collection.

    This attaches the statistics function which will automatically compute
    and update metadata value frequencies whenever records are added, updated,
    or deleted.

    Args:
        collection: The collection to enable statistics for
        stats_collection_name: Name of the collection where statistics will be stored.

    Returns:
        AttachedFunction: The attached statistics function

    Example:
        >>> attach_statistics_function(collection, "my_collection_statistics")
        >>> collection.add(ids=["id1"], documents=["doc1"], metadatas=[{"key": "value"}])
        >>> # Statistics are automatically computed
        >>> stats = get_statistics(collection, "my_collection_statistics")
    N)functionr   output_collectionparams)attach_functionr   r   )r   r   s     r   attach_statistics_functionr   3   s-    , %%$#J//	 &  r   c                 f    | j                  t        |             }|j                  dk(  sJ d       |S )a\  Get the statistics attached function for a collection.

    Args:
        collection: The collection to get the statistics function for

    Returns:
        AttachedFunction: The statistics function

    Raises:
        NotFoundError: If statistics are not enabled
        AssertionError: If the attached function is not a statistics function
    
statisticsz.Attached function is not a statistics function)get_attached_functionr   function_name)r   afs     r   get_statistics_fnr$   Q   s=     
	)	)*@*L	MB
L(878(Ir   delete_stats_collectionc                 R    t        |       }| j                  |j                  |      S )a  Detach statistics collection function from a collection.

    Args:
        collection: The collection to disable statistics for
        delete_stats_collection: If True, also delete the statistics output collection.
                                  Defaults to False.

    Returns:
        bool: True if successful

    Example:
        >>> detach_statistics_function(collection, delete_stats_collection=True)
    )delete_output_collection)r$   detach_functionr   )r   r%   attached_fns      r   detach_statistics_functionr*   e   s2      $J/K%%3J &  r   Nkeysc                    t        |      }d}|)t        |      |kD  rt        dt        |       d| d      ddlm} | j
                  j                  || j                  | j                        } || j
                  |dd	      }t        d
       }i }	d}
|t        t        dd|dgz   ii      nd}	 |j                  dg|
|      }|j                  d      xs g }|sn|D ]  }||j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }|^|a|d|g|dk(  r|dk(  sr||	d<   x||n|}t        |t              sJ t        |t              sJ t        |t              sJ |||   |   d<    |
t        |      z  }
dt!        |      i}|	r|	|d<   |S )aT  Get the current statistics for a collection.

    Statistics include frequency counts for all metadata key-value pairs,
    as well as a summary with the total record count.

    Args:
        collection: The collection to get statistics for
        stats_collection_name: Name of the statistics collection to read from.
        keys: Optional metadata key(s) to filter statistics for. Can be a single key
              string or a list of keys. If provided, only returns statistics for
              those specific keys.

    Returns:
        Dict[str, Any]: A dictionary with the structure:
            {
                "statistics": {
                    "key1": {
                        "value1": {"count": count, ...},
                        "value2": {"count": count, ...}
                    },
                    "key2": {...},
                    ...
                },
                "summary": {
                    "total_count": count
                }
            }

    Example:
        >>> attach_statistics_function(collection, "my_collection_statistics")
        >>> collection.add(
        ...     ids=["id1", "id2"],
        ...     documents=["doc1", "doc2"],
        ...     metadatas=[{"category": "A", "score": 10}, {"category": "B", "score": 10}]
        ... )
        >>> # Wait for statistics to be computed
        >>> stats = get_statistics(collection, "my_collection_statistics")
        >>> print(stats)
        {
            "statistics": {
                "category": {
                    "A": {"count": 1},
                    "B": {"count": 1}
                },
                "score": {
                    "10": {"count": 2}
                }
            },
            "summary": {
                "total_count": 2
            }
        }

    Raises:
        ValueError: If more than 30 keys are provided in the keys filter.
       NzToo many keys provided: z. Maximum allowed is z[ keys per request. Consider calling get_statistics multiple times with smaller key batches.r   r   )r   tenantdatabase)clientmodelembedding_functiondata_loaderc                       t        t              S N)r   dict r   r   <lambda>z get_statistics.<locals>.<lambda>   s    kRVFW r   keyz$insummary	metadatas)includeoffsetwherevaluevalue_labeltypecounttotal_countr    )r   len
ValueErrorchromadb.api.models.Collectionr   _clientget_collectionr.   r/   r   r   r
   get
isinstancestrintr6   )r   r   r+   	keys_listMAX_KEYSr   stats_collection_modelstats_collectionstatsr:   r=   where_filterpager;   metadatameta_keyr?   r@   
value_typerB   	stats_keyresults                         r   get_statisticsrY   {   sM   | 't,I HY(!:&s9~&6 7""* ,WW
 	
 : (//>>"  $$ ?  "!!$	 3>>W2XE GF   	UUUI$;<=>  ## M& $ 
 HH[)/R	! 	@H||E*HLL)E",,}5K!f-JLL)E $%*%y(-16. 0;/FEI%h444%i555%eS111:?E(OI.w73	@8 	#i. K N DK(F#yMr   )r   r   r   r   )Fr5   )__doc__typingr   r   r   r   r   collectionsr   chromadb.api.typesr	   r
   r   chromadb.api.functionsr   rF   r   $chromadb.api.models.AttachedFunctionr   rK   r   r   r$   boolr*   rY   r7   r   r   <module>ra      s   8 < ; # G G 69E	&| 	& 	&58<* ?D7;	2 &*RRR 9S>
"R 
#s(^	Rr   