
    ה9i-                         d dl mZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
mZmZ d dlmZ dddd	Zdd
Z G d de      Zd Z G d de      Zd Zd ZdddZddZ G d de      Zd Zy)    )Counter)suppress)
NamedTupleN)_isindeviceget_namespacexpx)is_scalar_nanFreturn_inversereturn_countsc                `    | j                   t        k(  rt        | ||      S t        | ||      S )a  Helper function to find unique values with support for python objects.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : ndarray
        Values to check for unknowns.

    return_inverse : bool, default=False
        If True, also return the indices of the unique values.

    return_counts : bool, default=False
        If True, also return the number of times each unique item appears in
        values.

    Returns
    -------
    unique : ndarray
        The sorted unique values.

    unique_inverse : ndarray
        The indices to reconstruct the original array from the unique array.
        Only provided if `return_inverse` is True.

    unique_counts : ndarray
        The number of times each of the unique values comes up in the original
        array. Only provided if `return_counts` is True.
    r   )dtypeobject_unique_python
_unique_np)valuesr   r   s      _/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/sklearn/utils/_encode.py_uniquer      s:    > ||v>
 	
 ~]     c                    t        |       \  }}d\  }}|r|r|j                  |       \  }}}}n?|r|j                  |       \  }}n(|r|j                  |       \  }}n|j	                  |       }|j
                  r]t        |d         rO|j                  ||j                        }|d|dz    }|r||||kD  <   |r|j                  ||d       ||<   |d|dz    }|f}	|r|	|fz  }	|r|	|fz  }	t        |	      dk(  r|	d   S |	S )zHelper function to find unique values for numpy arrays that correctly
    accounts for nans. See `_unique` documentation for details.)NNN   r   )r   
unique_allunique_inverseunique_countsunique_valuessizer
   searchsortednansumlen)
r   r   r   xp_inversecountsuniquesnan_idxrets
             r   r   r   7   s*    &!EB OGV-&(mmF&;#GV	,,V4	**62""6* ||gbk2//'2662-GaK()0GGg%& ffVGH%56F7OMgk*F*CzyX]3q6++r   c                   ,    e Zd ZU dZeed<   eed<   d Zy)MissingValuesz'Data class for missing data informationr    nonec                     g }| j                   r|j                  d       | j                  r|j                  t        j                         |S )z3Convert tuple to a list where None is always first.N)r,   appendr    np)selfoutputs     r   to_listzMissingValues.to_listd   s6    99MM$88MM"&&!r   N)__name__
__module____qualname____doc__bool__annotations__r2    r   r   r+   r+   ^   s    1	I
Jr   r+   c                     | D ch c]  }|t        |      s| }}|s| t        dd      fS d|v r*t        |      dk(  rt        dd      }nt        dd      }nt        dd      }| |z
  }||fS c c}w )a.  Extract missing values from `values`.

    Parameters
    ----------
    values: set
        Set of values to extract missing from.

    Returns
    -------
    output: set
        Set with missing values extracted.

    missing_values: MissingValues
        Object with missing value information.
    NF)r    r,   r   T)r
   r+   r"   )r   valuemissing_values_setoutput_missing_valuesr1   s        r   _extract_missingr>   n   s    " "U]mE6J  }U;;;!!!"a'$1e$$G! %2d$F! -$U C ((F((('s
   A3A3c                   (     e Zd ZdZ fdZd Z xZS )_nandictz!Dictionary with support for nans.c                 |    t         |   |       |j                         D ]  \  }}t        |      s|| _         y  y N)super__init__itemsr
   	nan_value)r0   mappingkeyr;   	__class__s       r   rD   z_nandict.__init__   s;    !!--/ 	JCS!!&	r   c                 ^    t        | d      rt        |      r| j                  S t        |      )NrF   )hasattrr
   rF   KeyErrorr0   rH   s     r   __missing__z_nandict.__missing__   '    4%-*<>>!smr   )r3   r4   r5   r6   rD   rN   __classcell__rI   s   @r   r@   r@      s    +r   r@   c                     t        | |      \  }}t        t        |      D ci c]  \  }}||
 c}}      }|j                  | D cg c]  }||   	 c}t	        |             S c c}}w c c}w )z,Map values based on its position in uniques.)r   )r   r@   	enumerateasarrayr   )r   r'   r#   r$   ivaltablevs           r   _map_to_integerrY      sb    &'*EB9W+=>Cc1f>?E::0AuQx0:HH ?0s   A%
A+c                   	 t        |       }t        |      \  }}t        |      }|j                  |j	                                t        j                  || j                        }|f}|r|t        | |      fz  }|r|t        | |      fz  }t        |      dk(  r|d   S |S # t        $ r1 t        d t        d | D              D              }t        d|       w xY w)Nr   c              3   4   K   | ]  }|j                     y wrB   )r5   ).0ts     r   	<genexpr>z!_unique_python.<locals>.<genexpr>   s     L!q~~Ls   c              3   2   K   | ]  }t        |        y wrB   )type)r]   rX   s     r   r_   z!_unique_python.<locals>.<genexpr>   s     2Kq472Ks   zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setr>   sortedextendr2   r/   arrayr   	TypeErrorrY   _get_countsr"   )r   r   r   uniques_setmissing_valuesr'   typesr)   s           r   r   r      s    
&k&6{&C#^%~--/0((7&,,7 *C022FG,..X]3q6++  
Ls2KF2K/KLL'',g/
 	

s   A$B" ":CT)check_unknownc                   t        | |      \  }}|j                  | j                  d      s	 t        | |      S |rt        | |      }|rt        d|       |j                  ||       S # t        $ r}t        d|       d}~ww xY w)a  Helper function to encode values into [0, n_uniques - 1].

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.
    The numpy method has the limitation that the `uniques` need to
    be sorted. Importantly, this is not checked but assumed to already be
    the case. The calling method needs to ensure this for all non-object
    values.

    Parameters
    ----------
    values : ndarray
        Values to encode.
    uniques : ndarray
        The unique values in `values`. If the dtype is not object, then
        `uniques` needs to be sorted.
    check_unknown : bool, default=True
        If True, check for values in `values` that are not in `unique`
        and raise an error. This is ignored for object dtype, and treated as
        True in this case. This parameter is useful for
        _BaseEncoder._transform() to avoid calling _check_unknown()
        twice.

    Returns
    -------
    encoded : ndarray
        Encoded values
    numericz%y contains previously unseen labels: N)r   isdtyper   rY   rL   
ValueError_check_unknownr   )r   r'   rk   r#   r$   ediffs          r   _encoders      s    : &'*EB::fllI.	J"6733 !&'2D #H!OPPw//  	JDQCHII	Js   A) )	B2B  Bc                 R   t        | |      \  }}d}|j                  | j                  d      st        |       }t	        |      \  }}t        |      t	              \  |z
  }|j
                  xr j
                   }	|j                  xr j                   }
fd}|rR|s|	s|
r&|j                  | D cg c]
  } ||       c}      }n&|j                  t        |       |j                        }t        |      }|
r|j                  d       |	r|j                  t        j
                         n|j                  |       }t        j                   ||d|      }|r@|j"                  rt%        | ||      }n&|j                  t        |       |j                        }|j'                  |j)                  |            rL|j)                  |      }|j'                  |      r*|j"                  r|r|j)                  |       }d||<   ||    }t        |      }|r||fS |S c c}w )a  
    Helper function to check for unknowns in values to be encoded.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : array
        Values to check for unknowns.
    known_values : array
        Known values. Must be unique.
    return_mask : bool, default=False
        If True, return a mask of the same shape as `values` indicating
        the valid values.

    Returns
    -------
    diff : list
        The unique values present in `values` and not in `know_values`.
    valid_mask : boolean array
        Additionally returned if ``return_mask=True``.

    Nrm   c                 j    | v xs- j                   xr | d u xs j                  xr t        |       S rB   )r,   r    r
   )r;   missing_in_uniquesrh   s    r   is_validz _check_unknown.<locals>.is_valid  sA    $ E&++=E&**C}U/Cr   r[   T)assume_uniquer#   r   )r   rn   r   rb   r>   r    r,   re   onesr"   r7   listr.   r/   r   r	   	setdiff1dr   r   anyisnan)r   known_valuesreturn_maskr#   r$   
valid_mask
values_setmissing_in_valuesrr   nan_in_diffnone_in_diffrw   r;   r   diff_is_nanis_nanrv   rh   s                   @@r   rp   rp      s   2 &,/EBJ::fllI.[
(8(D%
%,'*:;*G''K''++J4F4J4J0J(--M6H6M6M2M	 {lXXF&K5x&KL
WWS[W@
DzKKKK((0}}]LQSTyy"6<<
WWS[W@
 66"((<()((4.Kvvk"99XXf-F)*Jv& [L)DzZKC 'Ls   ;H$c                   .     e Zd ZdZ fdZd Zd Z xZS )_NaNCounterz$Counter with support for nan values.c                 B    t         |   | j                  |             y rB   )rC   rD   _generate_items)r0   rE   rI   s     r   rD   z_NaNCounter.__init__G  s    --e45r   c              #      K   |D ]:  }t        |      s| t        | d      sd| _        | xj                  dz  c_        < yw)z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r
   rK   r   )r0   rE   items      r   r   z_NaNCounter._generate_itemsJ  sD      	 D &
4-!"NNaN	 s   AAc                 ^    t        | d      rt        |      r| j                  S t        |      )Nr   )rK   r
   r   rL   rM   s     r   rN   z_NaNCounter.__missing__T  rO   r   )r3   r4   r5   r6   rD   r   rN   rP   rQ   s   @r   r   r   D  s    .6 r   r   c                 p   | j                   j                  dv rnt        |       }t        j                  t        |      t        j                        }t        |      D ]%  \  }}t        t              5  ||   ||<   ddd       ' |S t        | d      \  }}t        j                  ||d      }t        j                  |d         rt        j                  |d         rd|d<   t        j                  |||         }	t        j                  |t        j                        }||	   ||<   |S # 1 sw Y   xY w)zGet the count of each of the `uniques` in `values`.

    The counts will use the order passed in by `uniques`. For non-object dtypes,
    `uniques` is assumed to be sorted and `np.nan` is at the end.
    OUr[   NT)r   )rx   r   )r   kindr   r/   zerosr"   int64rS   r   rL   r   isinr}   r   
zeros_like)
r   r'   counterr1   rU   r   r   r&   uniques_in_valuesunique_valid_indicess
             r   rg   rg   Z  s    ||D f%#g,bhh7 ) 	*GAt(# *#DMq	* *	* &vTBM6 dK	xxb!"rxx'< $"??='BS:TU]]7"((3F &'; <FM* *s   2	D,,D5	)FF)F)collectionsr   
contextlibr   typingr   numpyr/   sklearn.utils._array_apir   r   r   r	   sklearn.utils._missingr
   r   r   r+   r>   dictr@   rY   r   rs   rp   r   rg   r9   r   r   <module>r      s{         F F 0 ',5 &R$,NJ  #)Lt  I,4 /3 (0VQh' ,r   