
    9i>             ,          S SK Jr  S SKrS SKrS SKJr  S SKJrJ	r	J
r
  S SKrS SKrS SKJr  S SKJr  S SKJrJr  SS	KJrJr  0 r \R0                  \R2                  4\R4                  \R6                  4\R8                  \R:                  4\R<                  \R>                  4\R0                  \R2                  4\R4                  \R6                  4S
.r  " S S5      r! " S S5      r" " S S5      r#\RH                  " SS S9r%\RL                  RO                  5       S:  a  S\R                  4S jr(OS SK)r)S\R                  4S jr(\RT                  \%S.S jr+SS\R                  4S jjr,SS jr-SS jr.SS jr/SS jr0SS jr1SS  jr2SS! jr3S"\\	\R                        4S# jr4S$\S%\Rj                  4S& jr6S\	\   S%\	\Rj                     4S' jr7 " S( S)5      r8     SS\R                  S*\	\R                     S+\	\R                     S,\	\R                     S%\9\R                  \84   4
S- jjr:      SS\R                  S.\	\8   S+\	\R                     S*\	\R                     S,\	\R                     S/\;S%\R                  4S0 jjr<SS1 jr=SSSS\R|                  4S\R                  S+\	\R                     S,\	\R                     4S2 jjr?SSSS\R|                  4S\R                  S+\	\R                     S,\	\R                     4S3 jjr@SSSSS4\R|                  4S\R                  S+\	\R                     S,\	\R                     S%\9\R                  \84   4S5 jjrA    SS\R                  S.\	\8   S+\	\R                     S,\	\R                     S/\	\;   S%\R                  4S6 jjrB    SS\R                  S.\	\8   S+\	\R                     S,\	\R                     S/\	\;   S%\R                  4S7 jjrC     SS\R                  S.\	\8   S+\	\R                     S,\	\R                     S/\	\;   S%\R                  4S8 jjrD\" S9\ES:9  SS\S*\	\R                     S,\	\R                     S%\9\\9\\4   4   4S; jj5       rF\" S9\ES:9    SS\S<\	\9\\4      S+\	\R                     S*\	\R                     S,\	\R                     S%\4S= jj5       rG\" S9\ES:9SS\S*\S,\	\R                     S%\4S> jj5       rH\" S9\ES:9SS\S*\S,\	\R                     S%\4S? jj5       rI         SS@\JSA\SB\SC\SD\KSE\KSF\;SG\KSH\	\R                     SI\KSJ\KSK\KSL\KSM\KSN\	\R                     SO\KS%S4"SP jjrL\" SQ\ES:9    SS@\JSA\SB\SC\SH\	\R                     SD\KSI\KSE\KSF\;SG\KSR\SS\	\R                     ST\SU\	\R                     SV\SW\	\R                     SL\KSM\KSN\	\R                     SO\KS%S4*SX jj5       rM   SS@\JSA\SB\SC\SH\	\R                     SD\KSI\KSJ\KSK\KSE\KSF\;SG\KSR\SS\	\R                     SY\SZ\	\R                     SL\KSM\KS%S4&S[ jjrN\" S9\ES:9SS\\S]\SF\;S^\;4S_ jj5       rO\R                  4S` jrQ    SS\Sa\S,\	\R                     4Sb jjrR   SS\Sa\S,\	\R                     4Sc jjrS   SS\Sa\S,\	\R                     4Sd jjrTS\R                  4S\R                  Sa\R                  S,\	\R                     4Se jjrV  SS\R                  Sf\R                  Sg\R                  S,\	\R                     Sh\	\R                     4
Si jjrW\" S9\ES:9    SS\R                  Sf\	\R                     Sg\	\R                     Sj\	\R                     S%\9\R                  \R                  \	\R                     4   4
Sk jj5       rX\" S9\ES:9SS\R                  4Sl jj5       rY " Sm Sn5      rZ " So Sp5      r[ " Sq Sr5      r\Ss r]St r^\R                  4Su jr`     SS\R                  Sg\	\R                     Sf\	\R                     Sv\	\R                     Sw\	\R                     4
Sx jjraS\R                  Sy\R                  4Sz jrbSS\R                  4S{ jjrc SS|\
\Z\R                  4   Sa\R                  S,\	\R                     4S} jjrdSS~ jreSrfg)    )IterableN)prod)AnyOptionalUnion)Tensor)
deprecated)pack_dict_to_tensorunpack_tensor_to_dict   )HIP_ENVIRONMENTlib)adammomentumrmsproplionlamblarsc                   >    \ rS rSrSrS rS r\S 5       rSS jr	Sr
g)	GlobalPageManager3   Nc                     [        S5      eNzCall get_instance() insteadRuntimeErrorselfs    g/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/functional.py__init__GlobalPageManager.__init__6       899    c                     / U l         g Npaged_tensorsr   s    r   
initializeGlobalPageManager.initialize9   s
    r"   c                     U R                   c0  U R                  U 5      U l         U R                   R                  5         U R                   $ r$   	_instance__new__r'   clss    r   get_instanceGlobalPageManager.get_instance<   6    == KK,CMMM$$&}}r"   c                 L    U R                   S S S2    H  n[        X!5        M     g )N)r&   prefetch_tensor)r   to_cputs      r   prefetch_allGlobalPageManager.prefetch_allC   s%     ##DbD)AA& *r"   r%   F)__name__
__module____qualname____firstlineno__r+   r   r'   classmethodr/   r7   __static_attributes__ r"   r   r   r   3   s*    I:   'r"   r   c                   :    \ rS rSrSrS rS r\S 5       rS r	Sr
g)CUBLAS_ContextK   Nc                     [        S5      er   r   r   s    r   r   CUBLAS_Context.__init__N   r!   r"   c                     0 U l         g r$   contextr   s    r   r'   CUBLAS_Context.initializeQ   s	    r"   c                     U R                   c0  U R                  U 5      U l         U R                   R                  5         U R                   $ r$   r*   r-   s    r   r/   CUBLAS_Context.get_instanceT   r1   r"   c                    UR                   U R                  ;  a  [        R                  R	                  5       n[        R                  R                  U5        [        R                  " [        R                  " 5       5      U R                  UR                   '   [        R                  R                  U5        U R                  UR                      $ r$   )
indexrH   torchcudacurrent_device
set_devicectc_void_pr   get_context)r   deviceprev_devices      r   rT   CUBLAS_Context.get_context[   s}    <<t||+**335KJJ!!&))+S__5F)GDLL&JJ!!+.||FLL))r"   rG   )r:   r;   r<   r=   r+   r   r'   r>   r/   rT   r?   r@   r"   r   rB   rB   K   s*    I:  *r"   rB   c                   4    \ rS rSrSrS rS r\S 5       rSr	g)Cusparse_Contextd   Nc                     [        S5      er   r   r   s    r   r   Cusparse_Context.__init__g   r!   r"   c                 `    [         R                  " [        R                  " 5       5      U l        g r$   )rR   rS   r   get_cusparserH   r   s    r   r'   Cusparse_Context.initializej   s    {{3#3#3#56r"   c                     U R                   c0  U R                  U 5      U l         U R                   R                  5         U R                   $ r$   r*   r-   s    r   r/   Cusparse_Context.get_instancem   r1   r"   rG   )
r:   r;   r<   r=   r+   r   r'   r>   r/   r?   r@   r"   r   rY   rY   d   s%    I:7  r"   rY   rO   )rM   ac                 @    [         R                  R                  U 5      $ r$   )rN   rO   	device_ofrb   s    r   _cuda_device_ofrf   }   s    zz##A&&r"   c                 ,    [         R                  " 5       $ r$   )
contextlibnullcontextre   s    r   rf   rf      s    %%''r"   dtyperU   c                    U R                   [        U5      -  n[        R                  " [        R
                  " U5      5      n[        R                  " U[        R                  " [        R                  5      5      n[        R                  R                  XRS9n[        R                  " X`[        U5      S9R                  U5      nSUl        UR                   Ul        U$ )N)shape)rk   countT)itemsizer   r   cget_managed_ptrrR   c_size_tcastPOINTERc_intnp	ctypeslibas_arrayrN   
frombufferviewis_pagedrM   page_deviceid)rk   rU   rm   	num_bytescuda_ptrc_ptr	new_arrayouts           r   	get_pagedr      s    e,I##BKK	$:;HGGHbjj23E%%e%9I


9e
E
J
J5
QCCLCJr"   FAc                    U R                   (       d   S5       eU(       a  SnOU R                  n[        R                  " [	        U 5      [
        R                  " U R                  5      [
        R                  " U5      5        g )Nz%Only paged tensors can be prefetched!r3   )	rz   r{   r   	cprefetchget_ptrrR   rq   nbytesc_int32)r   r5   deviceids      r   r4   r4      sO    ::>>>:??MM'!*bkk!((3RZZ5IJr"   c           	         S nUR                   [        R                  :X  a,  [        [        SU  S3S 5      n[
        R                  " U5      nOIUR                   [        R                  :X  a+  [        [        SU  S3S 5      n[
        R                  " U5      nUc  [        SU  35      e[        USS5      nU(       a   U(       a  [        U5        Ub  [        U5        U" [        U5      [        U5      W[
        R                  " UR                  5       5      5        UR                  (       d  UR                  (       a  [        R                  R!                  5         g g )Nc_fp32_uint8zFunction not implemented: 
is_managedF)rk   rN   float32getattrr   rR   c_floatuint8c_uint8NotImplementedErrorr4   r   c_int64numelrz   rO   synchronize)	func_namer   Bvalueprefetchfunccvaluer   s           r   elementwise_funcr      s    Dww%--sa	{%0$7E"	
EKK	sa	{&148E"|!$>yk"JKKL%0Jh=AWQZAGGI)>?zzQZZ
 	

   r"   c                      [        SU S U5        g )Nfillr   )r   r   rU   r   s       r   r   r      s    VQe,r"   c                     [        SXS5        g )N_mulr   r   )r   r   rU   s      r   r   r      s    VQ1%r"   c                 r   U (       a  SOSnSU-  nU(       d  US:  a  U (       d  SU-  OSU-  S-
  n[         R                  " USU5      nSUR                  5       -
  nUS:X  a  U$ UR                  5       S-  n[         R                  " US U R	                  5       S/U-  -   XWS  R	                  5       -   5      $ )	N                    r         ?   r   )rN   linspacer   r   tolist)signed
total_bitsadd_zerosigntotal_valuesvaluesgapls           r   create_linear_mapr      s    4sDj=L:>
 -3q*}:8I^^D#|4F

C
axLLNa||F2AJ--/1#);fRj>O>O>QQRRr"   c                     SSK Jn  U(       ax  UR                  [        R
                  " U SS5      S S 5      R                  5       nS/S-  nUR                  [        R
                  " U SS5      S S 5      * R                  5       nOwUR                  [        R
                  " U SS5      S S 5      R                  5       nS/S	-  nUR                  [        R
                  " U SS5      S S 5      * R                  5       nXE-   U-   n[        R                  " U5      nUR                  5       R                  nXR                  5       -  nUR                  5       S
:X  d   eU$ ! [         a  n[        S5      UeS nAff = f)Nr   )normzZScipy is required for `create_normal_map`. Install `bitsandbytes` with the `[test]` extra.g      ?	   r3      r      r   )scipy.statsr   ImportErrorppfrN   r   r   r   sortr   maxr   )	offsetuse_extra_valuer   iev1v2v3vr   s	            r   create_normal_mapr      sN   $ XXennVS!4Sb9:AACSHxxvsA6s;<<DDFXXennVS!4Sb9:AACSHxxvsA6s;<<DDF
"A\\!_F[[]!!F
jjlF<<>S   M/  h
	s   E 
E:)E55E:c                 v   UnUnU (       a  SOSnXE-   X6-
  :X  d   e/ n[        [        SX-
  -  * SX-
  -  S5      5       H  u  pUR                  SU	-  5        M     / n
[        [        R
                  " SS/US95      nSUS-
  -  n[        SU-  5       H  nU H  nUS:w  a  SOSn[        [        U5      5       H  u  nnUUSUS-   * -  -  -  nM     US:X  a
  USU* -  -  nOUSX-
  S-
  * -  -  nU
R                  U5        U (       d  Mr  U
R                  U* 5        M     M     [        U
5      SU-  :X  d   eU
R                  5         US:  a1  S[        U
5      -
  n[        U5       H  nU
R                  S5        M     U
R                  5         [        R                  " U
5      nUUR                  5       -  nU$ )Nr   r   r   )repeatr   r   )	enumeraterangeappendlist	itertoolsproductlenr   rN   tensorr   )r   exponent_bitsprecision_bitsr   ephas_signevaluesivalr   lstbiasevaluebit_patternr   pvalr   codes                      r   create_fp8_mapr      s   AAqH5J))))GEA-*B$C"DaMLdFeghijq#v k F
y  !Q?
@C"#Dm,-K1A!E$T+%674Ah// 8{T7
* v}q'8%9 99MM% vuf%  . v;!Z-'''
KKMA~CKsAMM! 
KKM<<DDHHJDKr"   c                    / nUS-
  nSXA-
  -  S-
  n[        U5       H  n[        U (       a  SXd-   U-
  -  S-   OSXd-   U-
  S-   -  S-   5      n[        R                  " SSU[        R                  S9nUSS USS -   S-  n	USUS-
  * U-   -  U	-  R                  5       -  nU (       d  M  USUS-
  * U-   -  * U	-  R                  5       -  nM     US	:  a~  [        R                  " SSUS-   [        R                  S9nUSS USS -   S-  n	USUS-
  * W-   -  U	-  R                  5       -  nU (       a!  USUS-
  * U-   -  * U	-  R                  5       -  nUR                  S	5        UR                  S
5        [        U5      SU-  :X  d   eS[        U5      -
  n
[        U
5       H  nUR                  S	5        M     UR                  5         [        R                  " U[        R                  S9$ )a  
Creates the dynamic quantiztion map.

The dynamic data type is made up of a dynamic exponent and
fraction. As the exponent increase from 0 to -7 the number
of bits available for the fraction shrinks.

This is a generalization of the dynamic type where a certain
number of the bits and be reserved for the linear quantization
region (the fraction). n determines the maximum number of
exponent bits.

For more details see
(8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561]
r   r   g?rk   Nr3          @
   r   r   r   )
r   intrN   r   r   r   r   r   r   r   )r   max_exponent_bitsr   datanon_sign_bitsadditional_itemsr   fraction_items
boundariesmeansr   s              r   create_dynamic_mapr     s   " D NM]>?!C$% !#&7781<q(+<<q@AAE

 ^^CN%--P
CR:ab>1S8",q01A56%?GGII6r 1A 56:;<uDLLNND & !^^C,<q,@V
CR:ab>1S8",q01A56%?GGIIr 1A 56:;<uDLLNNDKKNKKt9:%%%
D	/C3ZA  	IIK<<EMM22r"   tensorsc                    Sn[        5       nU  Hr  nUc  M  [        USS5      (       a  M  XR                  R                  S:g  -  nUR	                  UR                  R                  UR                  R
                  45        Mt     U(       d4  [        SU  Vs/ s H  o3R                  UR                  4PM     sn 35      e[        U5      S:  a4  [        SU  Vs/ s H  o3R                  UR                  4PM     sn 35      eU$ s  snf s  snf )aP  Verifies that the input tensors are all on the same device.

An input tensor may also be marked as `paged`, in which case the device placement is ignored.

Args:
    tensors (`Iterable[Optional[torch.Tensor]]`): A list of tensors to verify.

Raises:
    `RuntimeError`: Raised when the verification fails.

Returns:
    `Literal[True]`
Trz   FcpuzZAll input tensors need to be on the same GPU, but found some tensors to not be on a GPU:
 r   zcInput tensors need to be on the same GPU, but found the following tensor and device combinations:
 )	setr   rU   typeaddrM   r   rm   r   )r   on_gpugpu_idsr6   s       r   	is_on_gpur   O  sK    FeG=J!>!>hhmmu,,FKK78	  i  IP  kQ  IP  DElslsuvu}u}k~  IP  kQ  jR  S
 	
 7|ar  RY  tZ  RY  MNu|u|~  G  G  uH  RY  tZ  s[  \
 	
 M kQ
 tZs   !D
!D

r   returnc                 R   U R                   R                  S:X  aG  [        R                  " [        R
                  R                  U R                   R                  5      5      $ [        R                  " [        R
                  R                  U R                   R                  5      5      $ )Nxpu)	rU   r   rR   rS   rN   _C_xpu_getCurrentRawStreamrM   _cuda_getCurrentRawStream)r   s    r   _get_tensor_streamr   s  sd    }}U"{{588<<V]]=P=PQRR;;uxx99&--:M:MNOOr"   c                 R    U c  g[         R                  " U R                  5       5      $ )zGets the memory address of the first element of a tenso

Args:
    A (`Optional[Tensor]`): A PyTorch tensor.

Returns:
    `Optional[ct.c_void_p]`: A pointer to the underlying tensor data.
N)rR   rS   data_ptr)r   s    r   r   r   z  s!     	y;;qzz|$$r"   c                       \ rS rSrSrSr\ V Vs/ s H  nSU 3PM
     snn r/ SQr       SS jrS r	\
S	\\\4   S
\R                  SS 4S j5       rSS jrS rS rSrgs  snn f )
QuantStatei  zWcontainer for quantization state components to work with Params4bit and similar classes)fp4nf4bitsandbytes__)absmax	quant_mapnested_absmaxnested_quant_mapquant_state
quant_type	blocksizerk   rm   nested_blocksizenested_dtypenested_offsetNc	                 v    Xl         X l        X0l        X`l        X@l        XPl        Xpl        Xl        US LU l        g r$   )	r  rm   r   rk   r
  r	  r   state2nested)	r   r  rm   r   r
  r	  rk   r   r  s	            r   r   QuantState.__init__  s8     
	
"$D(r"   c                 H   U R                   (       aT  U R                  U R                  U R                  U R                  U R
                  U R                  /U R                  /nX!   $ U R                  U R                  U R                  U R                  SU R                  /nX!   $ )z
ensures compatibility with older quant state scheme with nested lists.
assumes the following layout:
state = [qabsmax, input_shape, A.dtype, blocksize, [offset, state2], quant_type]
state2 = [absmax, input_shape, A.dtype, blocksize, None, quant_type]
N)r  r  rm   rk   r
  r   r  r	  )r   idx	list_reprs      r   __getitem__QuantState.__getitem__  s     ;;



dkk*I ~ djj$**dnndTXTcTcdI~r"   qs_dictrU   r   c                    UR                  5        VVs/ s H0  u  p4SU;   d  M  [        U[        R                  5      (       d  M.  UPM2     nnn[	        U5      (       d  SU;  a  [        S5      e[	        U5      S:w  d%  US   R                  S5      S   U R                  ;  a  [        SU R                   S	U S35      e[	        U5      S:X  a.  US   nUR                  [        UR                  U5      5      5        UR                  5        VVs0 s H  u  p4UR                  S5      S   U_M     nnn[        UR                  5       5      R                  U R                  5      (       d   eS
U;   as  [        R                  " [!        US   5      5      R#                  U5      nU " US
   R#                  U5      US   US   R#                  U5      [%        [        US   5      S9nOSu  pxU " US   US   R#                  U5      US   US   R#                  U5      [%        [        US   5      US   b  [        R&                  " US   5      OSUUS9n	U	$ s  snnf s  snnf )a'  
unpacks components of state_dict into QuantState
where necessary, convert into strings, torch.dtype, ints, etc.

qs_dict: based on state_dict, with only relevant keys, striped of prefixes.

item with key `quant_state.bitsandbytes__[nf4/fp4]` may contain minor and non-tensor quant state items.
r  r	  z<Expected packed or unpacked quant_state items, found neitherr   r   .r3   z@There should be exactly one `quant_state` item with ending from z.
Detected r  r  r  r  r  )r  r
  r   rk   NNr  r
  r  rk   rm   N)r	  r  r
  r   rk   rm   r   r  )items
isinstancerN   r   r   
ValueErrorsplitvalid_qs_type_keysupdater   popr   keysissubsetvalid_qs_keysr   floattor   Size)
r.   r  rU   kr   qs_keyfirst_qs_keyr   r  r  s
             r   	from_dictQuantState.from_dict  s<    !(f=A3E!*UVX]XdXdJe!f6{{|7:[\\[A!5b!9AWAW!WRSVSiSiRjjvw}v~~  A 
 v;!!!9LNN0\1JKL3:==?C?411773<#Q&?C7<<>"++C,=,=>>>>g%\\%(@"ABEEfMF/226:!"45/033F;eW^%<=	F (NF|,8$''/k*%((0%!1229'2B2N%**WW-.TX	
 K g Ds   III!Ic                    U R                   U R                  U R                  U R                  [	        U R
                  5      R                  S5      [        U R                  5      S.nU R                  (       a  UR                  U R                  R                  U R                  R                  U R                  R                  R                  5       [	        U R                  R
                  5      R                  S5      U R                  R                  5       S.5        U(       d  U$ UR                  5        VVs0 s H(  u  p4[!        U["        R$                  5      (       d  M&  X4_M*     nnnUR                  5        VVs0 s H(  u  p4[!        U["        R$                  5      (       a  M&  X4_M*     nnn['        U5      USU R                   -   '   U$ s  snnf s  snnf )z
returns dict of tensors and strings to use in serialization via _save_to_state_dict()
param: packed -- returns dict[str, torch.Tensor] for state_dict fit for safetensors saving
ztorch.)r	  r  r
  r  rk   rm   )r  r  r  r  r  zquant_state.bitsandbytes__)r	  r  r
  r   strrk   striptuplerm   r  r   r  cloner   itemr  r  rN   r   r
   )r   packedr  r(  r   qs_packed_dictnon_tensor_dicts          r   as_dictQuantState.as_dict  sR    //kk_**844::&
 ;;NN%)[[%7%7(,(=(=(,(8(8(>(>(@$'(9(9$:$@$@$J%)[[%5%5%7 N ,3==?Z?41jELL>Y$!$?Z,3MMO_ODA:aQVQ]Q]C^414O_NabqNr84??JK [_s   9%G"G>%G'Gc                    U R                   R                  U5      U l         U R                  R                  U5      U l        U R                  (       a  U R                  R                  U5      U l        U R
                  R                  R                  U5      U R
                  l        U R
                  R                   R                  U5      U R
                  l         g g r$   )r   r&  r  r  r   r  )r   rU   s     r   r&  QuantState.to  s    IILL(	kknnV,;;++..0DK!%!3!3!6!6v!>DKK#{{//226:DKK r"   c                 0   [        U[        5      (       d  g[        R                  " U R                  UR                  SS9=(       GaO    U R
                  UR
                  :H  =(       Ga.    [        R                  " U R                  UR                  SS9=(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       a    U R                  b&  UR                  b  U R                  UR                  :H  OU R                  UR                  L =(       aK    U R                  b&  UR                  b  U R                  UR                  :H  $ U R                  UR                  L $ )NFgư>)atol)r  r   rN   allcloser  rm   r   rk   r
  r	  r   r  )r   others     r   __eq__QuantState.__eq__"  s3   %,, NN4;;4@  

ekk) tyy%**4@ 

ekk) %//1	
 5#3#33 ;;*u||/G u||+[[ELL0 ;;*u||/G u||+	
 [[ELL0	
r"   )	r  r
  r   rk   r  r   r	  rm   r  )NNNNNNNr9   )r:   r;   r<   r=   __doc__valid_quant_typesr  r$  r   r  r>   dictr.  r   rN   rU   r+  r6  r&  r>  r?   ).0xs   00r   r   r     s    a&8IJ8I1N1#.8IJM$ )*( 0S#X 0 0 0 0d@;
k Ks   A#r   r   r  r   c           	         Uc=  S[         ;  a*  [        5       R                  U R                  5      [         S'   [         S   n[        R
                  R                  R                  R                  U UR                  U R                  5      U5      u  pgU(       aQ  UR                  5       nXx-  n[        XtSS9u  p[        U	UR                  U R                  SS9UU R                  UU
S9nO,[        XqR                  U R                  SS9X@R                  S9nUb  UR                  U5      OUnUb   UR                  UR                  5      Ul        X;4$ )a  Quantize a tensor in blocks of values.

The input tensor is quantized by dividing it into blocks of `blocksize` values.
The the absolute maximum value within these blocks is calculated for scaling
the non-linear quantization.

Args:
    A (`torch.Tensor`): The input tensor. Supports `float16`, `bfloat16`, or `float32` datatypes.
    code (`torch.Tensor`, *optional*):
        A mapping describing the low-bit data type. Defaults to a signed 8-bit dynamic type.
        For more details, see  (8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561].
    absmax (`torch.Tensor`, *optional*): A tensor to use to store the absmax values.
    out (`torch.Tensor`, *optional*): A tensor to use to store the result.
    blocksize (`int`, *optional*):
        The size of the blocks. Defaults to 4096.
        Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.
    nested (`bool`, *optional*): Whether to additionally quantize the absmax values. Defaults to False.

Raises:
    ValueError: Raised when the input data type is not supported.

Returns:
    `Tuple[torch.Tensor, QuantState]`: A tuple containing the quantization results.
    - `torch.Tensor`: The quantized tensor.
    - [`QuantState`]: The state object used to undo the quantization.
dynamicF)r
  r  T)copy)r  r   r
  rk   r   r  r  r   r
  rk   )	name2qmapr   r&  rU   rN   opsbitsandbytesquantize_blockwisedefaultmeanr   rk   copy_r  )r   r   r  r   r
  r  _out_absmaxr   qabsmaxr  r  s               r   rL  rL  :  s+   F |I%#5#7#:#:188#DIi #II**==EE	MD ,WRWX -''
 !ggahhTg6R^govovw !_#))D/$C #\\+*<*<=r"   r  r
  c           	         Uc  Uc   eUc@  Uc=  S[         ;  a*  [        5       R                  U R                  5      [         S'   [         S   nUc  [	        X#U[
        R                  S9nUR                  nUR                  (       a\  [        UR                  UR                  5      nX!R                  -  nUR                  [
        R                  :w  a  UR                  5       nUbo  [
        R                  R                  R                  R!                  U UUR"                  R                  U R                  5      UR$                  UR                  US9  U$ [
        R                  R                  R                  R'                  U UUR"                  R                  U R                  5      UR$                  UR                  5      $ )a|  Dequantize a tensor in blocks of values.

The input tensor is dequantized by dividing it into blocks of `blocksize` values.
The the absolute maximum value within these blocks is used for scaling
the non-linear dequantization.

Args:
    A (`torch.Tensor`): The quantized input tensor.
    quant_state ([`QuantState`], *optional*):
        The quantization state as returned by [`quantize_blockwise`].
        Required if `absmax` is not provided.
    absmax (`torch.Tensor`, *optional*):
        A tensor containing the scaling values.
        Required if `quant_state` is not provided and ignored otherwise.
    code (`torch.Tensor`, *optional*):
        A mapping describing the low-bit data type. Defaults to a signed 8-bit dynamic type.
        For more details, see  (8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561].
        Ignored when `quant_state` is provided.
    out (`torch.Tensor`, *optional*): A tensor to use to store the result.
    blocksize (`int`, *optional*):
        The size of the blocks. Defaults to 4096.
        Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.
        Ignored when `quant_state` is provided.

Raises:
    ValueError: Raised when the input data type is not supported.

Returns:
    `torch.Tensor`:
        The dequantized tensor. The datatype is indicated by `quant_state.dtype` and defaults to `torch.float32`.
rF  rH  r   )rI  r   r&  rU   r   rN   r   r  r  dequantize_blockwiser  r   rk   r%  rJ  rK  r   r   r
  rM  )r   r  r  r   r   r
  r  s          r   rU  rU    ss   R "f&888|+I%#5#7#:#:188#DIi # YV[VcVcdF%k&8&8+:L:LM$$$<<5==(\\^F
		3377)!! 	8 	
 
99!!66>>	AHH% r"   c                 n   Uc  SnS nU S:X  a   / SQnO8U S:X  a  / SQnO-U S:X  a  / SQnO"U S:X  a  US	:X  a  / S
QS S S2   nO[        S5      eUc  [        SU  S35      e[        R                  " X1S9nUR                  UR	                  5       R                  5       5        UR                  5       S:X  d   eU$ )NrO   r  )r   g    6Gg    fg    TFٿg   I4ҿg   ০ǿg    Or   g   __?g   `\?g   ?g   @g?g    4?g   ` ?g   `v"?r   r  )r   g      ?g       @g      (@g      @g      @r   g      @r   g      g       g      (g      g      g       g      int4)               r   r   r   r   r3   iiaf4@   )r   g|8geg:Kڞ׿gH2퓊cпg}Yu-ÿgQ	#(Dr   gF?g`_?g
0E?gL_߹E?gƶ=?ga@?gкv-?r   r3   z94-bit AbnormalFloats currently only support blocksize 64.z	Typename z not supportedrU      )r   rN   r   div_absr   r   )typenamerU   r
  r   s       r   get_4bit_typerh    s    ~D5		
$ 
U	 l	V	G	U	 ?" d#D& &&abb|!IhZ~"FGG<<,DIIdhhjnn::<2Kr"   c           	      D    Uc  [         (       d  SOSn[        XX#USU5      $ Nrb     r  r   quantize_4bitr   r  r   r
  compress_statisticsquant_storages         r   quantize_fp4rq     *     -oB3	C4GP]^^r"   c           	      D    Uc  [         (       d  SOSn[        XX#USU5      $ Nrb  rk  r  rl  rn  s         r   quantize_nf4ru  -  rr  r"   r  c                    Uc  [         (       d  SOSnU R                  n[        R                  R                  R
                  R                  U UUU5      u  p[        XPR                  S9n
U(       a:  U	R                  5       n[        X-
  SS9u  pA	[        UUU R                  UU
UUUS9nO[        U	UU R                  UU
US9nUb  UR                  U5      OUnUb   UR                  UR                  5      Ul        X.4$ )a  Quantize tensor A in blocks of 4-bit values.

Quantizes tensor A by dividing it into blocks which are independently quantized.

Args:
    A (`torch.Tensor`): The input tensor. Supports `float16`, `bfloat16`, or `float32` datatypes.
    absmax (`torch.Tensor`, *optional*): A tensor to use to store the absmax values.
    out (`torch.Tensor`, *optional*): A tensor to use to store the result.
    blocksize (`int`, *optional*):
        The size of the blocks. Defaults to 128 on ROCm and 64 otherwise.
        Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.
    compress_statistics (`bool`, *optional*): Whether to additionally quantize the absmax values. Defaults to False.
    quant_type (`str`, *optional*): The data type to use: `nf4` or `fp4`. Defaults to `fp4`.
    quant_storage (`torch.dtype`, *optional*): The dtype of the tensor used to store the result. Defaults to `torch.uint8`.

Raises:
    ValueError: Raised when the input data type is not supported.

Returns:
    Tuple[`torch.Tensor`, `QuantState`]: A tuple containing the quantization results.
    - `torch.Tensor`: The quantized tensor with packed 4-bit values.
    - [`QuantState`]: The state object used to undo the quantization.
rb  rk  rc  r   )r
  )r  rm   rk   r
  r   r	  r   r  )r  rm   rk   r
  r   r	  )r   rm   rN   rJ  rK  rm  rM  rh  rU   rN  rL  r   rk   rO  r  )r   r  r   r
  ro  r	  rp  input_shaperP  rQ  r   r   rR  r  states                  r   rm  rm  :  s    B -oB3	''KII**88@@		MD HH5D,W-=M''!	
 ''!
 !_#))D/$C ||ELL1:r"   c                 B    Uc  [         (       d  SOSn[        XX#US5      $ rj  r   dequantize_4bitr   r  r  r   r
  s        r   dequantize_fp4r}    &     -oB3	16	5IIr"   c                 B    Uc  [         (       d  SOSn[        XX#US5      $ rt  rz  r|  s        r   dequantize_nf4r    r~  r"   c           
      @   Uc  [         (       d  SOSnUc*  Ub  Uc   e[        UUR                  UR                  UUS9nOUR                  nUR
                  (       a\  [        UR                  UR                  5      nX!R                  -  nUR                  [        R                  :w  a  UR                  5       nUb_  [        R                  R                  R                  R                  XUR                   UR"                  UR                  UR                  US9  O`[        R                  R                  R                  R%                  U UUR                   UR"                  UR                  UR                  5      nU R                  S   S:X  a  UR'                  5       $ U$ )aE  Dequantizes a packed 4-bit quantized tensor.

The input tensor is dequantized by dividing it into blocks of `blocksize` values.
The the absolute maximum value within these blocks is used for scaling
the non-linear dequantization.

Args:
    A (`torch.Tensor`): The quantized input tensor.
    quant_state ([`QuantState`], *optional*):
        The quantization state as returned by [`quantize_4bit`].
        Required if `absmax` is not provided.
    absmax (`torch.Tensor`, *optional*):
        A tensor containing the scaling values.
        Required if `quant_state` is not provided and ignored otherwise.
    out (`torch.Tensor`, *optional*): A tensor to use to store the result.
    blocksize (`int`, *optional*):
        The size of the blocks. Defaults to 128 on ROCm and 64 otherwise.
        Valid values are 64, 128, 256, 512, 1024, 2048, and 4096.
    quant_type (`str`, *optional*): The data type to use: `nf4` or `fp4`. Defaults to `fp4`.

Raises:
    ValueError: Raised when the input data type or blocksize is not supported.

Returns:
    `torch.Tensor`: The dequantized tensor.
rb  rk  )r  rm   rk   r
  r	  rT  r   r   )r   r   rm   rk   r  r  rU  r  r   rN   r   r%  rJ  rK  r{  r   r
  r	  rM  r6   )r   r  r  r   r
  r	  s         r   r{  r{    s`   F -oB3	!co55 ))))!
 ##%k&8&8+:L:LM$$$<<5==(\\^F
		..22{,,k.D.DkFWFWYdYjYjps 	3 	
 ii$$44<<!!""
 	wwqzQuuwJr"   zDThis function is deprecated and will be removed in a future release.)categoryc                    UcX  S[         ;  a*  [        5       R                  U R                  5      [         S'   [         S   nUR                  U R                  5      n[        R
                  " U 5      R                  5       nUR                  [        R                  :w  a  UR                  5       nX-  n[        XAU5      nX#U44$ )NrF  )rI  r   r&  rU   rN   rf  r   rk   r   r%  quantize_no_absmax)r   r   r   r  inps        r   quantizer    s     |I%#5#7#:#:188#DIi #wwqxx YYq\F||u}}$
*C
S
,Cr"   rx  c                    Uc  Uc   eUc[  UcX  S[         ;  a*  [        5       R                  U R                  5      [         S'   [         S   nUR                  U R                  5      nUc  X#4n[	        XS   U5      nXAS   -  $ )NrF  r   r   )rI  r   r&  rU   dequantize_no_absmax)r   rx  r  r   r   s        r   
dequantizer    s      222|I%#5#7#:#:188#DIi #wwqxx }
q(C
0Cq>r"   c           
      b   [        U 5         Uc#  [        R                  " U [        R                  S9n[	        X/5        [
        R                  " [        U5      [        U 5      [        U5      [        R                  " U R                  5       5      5        SSS5        U$ ! , (       d  f       U$ = f)a  
Quantizes input tensor to 8-bit.

Quantizes the 32-bit input tensor `A` to the 8-bit output tensor
`out` using the quantization map `code`.

Parameters
----------
A : torch.Tensor
    The input tensor.
code : torch.Tensor
    The quantization map.
out : torch.Tensor, optional
    The output tensor. Needs to be of type byte.

Returns
-------
torch.Tensor:
    Quantized 8-bit tensor.
Nr   )rf   rN   
zeros_liker   r   r   	cquantizer   rR   rt   r   )r   r   r   s      r   r  r    sz    , 
	;""1EKK8C1(gdmWQZrxx	?RS	 
 J 
	 Js   B	B
B.c           
      |   [        U 5         Uc#  [        R                  " U [        R                  S9n[	        XU/5        [        U 5      n[        R                  " [        U5      [        U 5      [        U5      [        R                  " U R                  5       5      U5        SSS5        U$ ! , (       d  f       U$ = f)ac  
Dequantizes the 8-bit tensor to 32-bit.

Dequantizes the 8-bit tensor `A` to the 32-bit tensor `out` via
the quantization map `code`.

Parameters
----------
A : torch.Tensor
    The 8-bit input tensor.
code : torch.Tensor
    The quantization map.
out : torch.Tensor
    The 32-bit output tensor.

Returns
-------
torch.Tensor:
    32-bit output tensor.
Nr   )rf   rN   r  r   r   r   r   cdequantizer   rR   rt   r   )r   r   r   streams       r   r  r  8  s    , 
	;""1EMM:C4C.!#A&wqz73<!'')ATV\] 
 J 
	 Js   BB,,
B;optimizer_namegr   state1beta1epssteplrr  beta2beta3alphaweight_decaygnorm_scale	unorm_vec	max_unormc                     SnUS:  a.  [         R                  " UR                  R                  5       5      n[	        XX8U/5        [         R
                  R                  R                  U UUUUUUUUU	U
UUUUUUU5        g)a  
Performs an inplace optimizer update with one or two optimizer states.

Universal optimizer update for 32-bit state and 32/16-bit gradients/weights.

Parameters
----------
optimizer_name : str
    The name of the optimizer: {adam}.
g : torch.Tensor
    Gradient tensor.
p : torch.Tensor
    Parameter tensor.
state1 : torch.Tensor
    Optimizer state 1.
beta1 : float
    Optimizer beta1.
eps : float
    Optimizer epsilon.
weight_decay : float
    Weight decay.
step : int
    Current optimizer step.
lr : float
    The learning rate.
state2 : torch.Tensor
    Optimizer state 2.
beta2 : float
    Optimizer beta2.
beta3 : float
    Optimizer beta3.
alpha : float
    Optimizer alpha.
gnorm_scale : float
    The factor to rescale the gradient to the max clip value.
unorm_vec : torch.Tensor
    The tensor for the update norm.
max_unorm : float
    The maximum update norm relative to the weight norm.
skip_zeros : bool
    Whether to skip zero-valued gradients or not (default: False).
r   N)rN   r   r   r%  r   rJ  rK  optimizer_update_32bit)r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  
skip_zeros
param_norms                     r   r  r  X  s    | J3ZZ/
qVY/0	II11		
%r"   zyThis function is deprecated and will be removed in a future release. Please use optimizer_update_8bit_blockwise instead. qmap1qmap2max1max2new_max1new_max2c                    SnUS:  a.  [         R                  " UR                  R                  5       5      n[	        U5         [        XX4UXXX/5        UR                  [         R                  :X  Ga  UR                  [         R                  :X  Gaa  [        U    S   " [        U5      [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U	5      [        U
5      [        U5      [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " UR                  5       5      5        GOUR                  [         R                  :X  Ga  UR                  [         R                  :X  Ga`  [        U    S   " [        U5      [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U	5      [        U
5      [        U5      [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " UR                  5       5      5        O%[!        SUR                   SUR                   35      eSSS5        g! , (       d  f       g= f)a  
Performs an inplace Adam update.

Universal Adam update for 32/8-bit state and 32/16-bit gradients/weights.
Uses AdamW formulation if weight decay > 0.0.

Parameters
----------
optimizer_name : str
    The name of the optimizer. Choices {adam, momentum}
g : torch.Tensor
    Gradient tensor.
p : torch.Tensor
    Parameter tensor.
state1 : torch.Tensor
    Adam state 1.
state2 : torch.Tensor
    Adam state 2.
beta1 : float
    Adam beta1.
beta2 : float
    Adam beta2.
eps : float
    Adam epsilon.
weight_decay : float
    Weight decay.
step : int
    Current optimizer step.
lr : float
    The learning rate.
qmap1 : torch.Tensor
    Quantization map for first Adam state.
qmap2 : torch.Tensor
    Quantization map for second Adam state.
max1 : torch.Tensor
    Max value for first Adam state update.
max2 : torch.Tensor
    Max value for second Adam state update.
new_max1 : torch.Tensor
    Max value for the next Adam update of the first state.
new_max2 : torch.Tensor
    Max value for the next Adam update of the second state.
gnorm_scale : float
    The factor to rescale the gradient to the max clip value.
unorm_vec : torch.Tensor
    The tensor for the update norm.
max_unorm : float
    The maximum update norm relative to the weight norm.
r   r   r   zAGradient+optimizer bit data type combination not supported: grad z, optimizer N)rN   r   r   r%  rf   r   rk   r   r   str2optimizer8bitr   rR   r   r   r   float16r  )r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s                        r   optimizer_update_8bitr    s   Z J3ZZ/
		1E$hab77emm#(Cn-a0

	"

9%

:&

5!

5!

3

4 

2!!

<(

;'

1779%+. WW%&,,%++*En-a0

	"

9%

:&

5!

5!

3

4 

2!!

<(

;'

1779%+0 STUT[T[S\\hioiuiuhvw g 
		s   M4N??
Oabsmax1absmax2c                     [        X!X4XX/5        [        R                  R                  R	                  U UUUUUUUUU	U
UUUUUUUU5        g r$   )r   rN   rJ  rK  optimizer_update_8bit_blockwise)r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s                      r   r  r  :  sf    * qVU7DE	II::		
'r"   grad	gnorm_vec
percentilec           
      J   [        U 5         [        X/5        U R                  [        R                  :X  ab  [
        R                  " [        U 5      [        U5      [        R                  " U5      [        R                  " U R                  5       5      5        OU R                  [        R                  :X  ab  [
        R                  " [        U 5      [        U5      [        R                  " U5      [        R                  " U R                  5       5      5        O[        SU R                   S35      eSSS5        [        R                  " XS-     5      n[        R                  " U5      u  pV[        R                  " XS   5      nSnXG:  a  Xt-  nXGU4$ ! , (       d  f       Nh= f)zApplies percentile clipping

grad: torch.Tensor
    The gradient tensor.
gnorm_vec: torch.Tensor
    Vector of gradient norms. 100 elements expected.
step: int
    The current optimization steps (number of past gradient norms).

zGradient type z not supported!NrZ   r   )rf   r   rk   rN   r   r   cpercentile_clipping_g32r   rR   r   r   r  cpercentile_clipping_g16r  sqrtr   )	r  r  r  r  current_gnormvalsr  
clip_valuer  s	            r   percentile_clippingr  h  s-    
	4#$::&((	"

4 

4::<(	 ZZ5==(((	"

4 

4::<(	 ~djj\IJJ# 
& JJy45M

9%IDD,-JK! 0k117 
	s   D&F
F"c                    [         R                  R                  5       (       d  [         R                  R                  5         U R                  U:w  d  UR                  U:w  a%  [        SU R                   SUR                   35      eU R                  nUR                  nUnUn	Sn
[        U5      S:X  a  [        U5      S:X  a  U(       d+  U	(       d$  U R                  S   UR                  S   :w  a  Sn
GO\U(       a+  U	(       d$  U R                  S   UR                  S   :w  a  Sn
GO*U(       a+  U	(       a$  U R                  S   UR                  S   :w  a  Sn
GOU(       d)  U	(       a"  U R                  S   UR                  S   :w  a  Sn
GO[        U5      S:X  a  [        U5      S:X  a  U(       d+  U	(       d$  U R                  S   UR                  S   :w  a  Sn
GOvU(       a+  U	(       d$  U R                  S   UR                  S   :w  a  Sn
GODU(       a+  U	(       a$  U R                  S   UR                  S   :w  a  Sn
GOU(       d)  U	(       a"  U R                  S   UR                  S   :w  a  Sn
O[        U5      S:X  a  [        U5      S:X  a  U(       d*  U	(       d#  U R                  S   UR                  S   :w  a  Sn
OU(       a*  U	(       d#  U R                  S   UR                  S   :w  a  Sn
OaU(       a*  U	(       a#  U R                  S   UR                  S   :w  a  Sn
O0U(       d)  U	(       a"  U R                  S   UR                  S   :w  a  Sn
Ube  UR                  nU
(       dP  [        U5      S:X  aA  [        U5      S:X  a2  US   US   :X  a&  US   US   :X  a  US   US   :X  a  US   US   :X  a  Sn
GO[        U5      S:X  aw  [        U5      S:X  ah  U(       d  U	(       d  US   US   4nGOqU(       a  U	(       a  US   US   4nGOWU(       a  U	(       d  US   US   4nGO=U(       d  U	(       a
  US   US   4nGO#[        U5      S:X  a  [        U5      S:X  at  U(       d  U	(       d  US   US   US   4nOU(       a  U	(       a  US   US   US   4nOU(       a  U	(       d  US   US   US   4nOU(       d  U	(       a  US   US   US   4nO[        U5      S:X  a  [        U5      S:X  as  U(       d  U	(       d  US   US   US   4nOVU(       a  U	(       a  US   US   US   4nO9U(       a  U	(       d  US   US   US   4nOU(       d  U	(       a  US   US   US   4nU
(       d  [        S	U S
U SU S
U	 S3	5      eW$ )Nz3Expected torch.int8 input tensors A and B, but got  and Tr   r   r   Fr\  z?Tensor dimensions incorrect for matrix mulitiplication: A x B:  x z with transpose for A x B: r  )	rN   rO   is_initializedinitrk   	TypeErrorrm   r   r  )r   r   r   transposed_Atransposed_Bexpected_typesAsBtAtBcorrectsouts               r   check_matmulr    sz   ::$$&&

ww-177m#;MaggYV[\]\c\c[deff	
B	
B	B	BG
2w!|B1"qwwqz!9GqwwqzQWWQZ7GB1771:3GqwwqzQWWQZ7G	RA#b'Q,"qwwqz!9GqwwqzQWWQZ7GB1771:3GqwwqzQWWQZ7G	RA#b'Q,"qwwqz!9GqwwqzQWWQZ7GB1771:3GqwwqzQWWQZ7G
yy3r7a<CGqLAw"Q%DGr!u$4A"Q%BqEUWXYUZNr7a<CGqLb1r!u~1r!u~B1r!u~B1r!u~W\c"glb1r!ube,1r!ube,B1r!ube,B1r!ube,W\c"glb1r!ube,1r!ube,B1r!ube,B1r!ube,MbTQTUWTXXstvswwz{}z~~  A
 	
 Kr"   r   c           
         Uc  [        S5      eUR                  nUR                  (       a"  [        XeR                  5      UR
                  -   nUbW  [        R                  R                  R                  R                  U UUR                  UUR                  UR                  US9  U$ [        R                  R                  R                  R                  U UUR                  UUR                  UR                  5      $ )NzIstate cannot be None. gemv_4bit() requires the state from quantize_4bit()rT  )r  r  r  rU  r  r   rN   rJ  rK  	gemv_4bitr   rm   r   r
  rM  )r   r   r   r  r  rx  r  s          r   r  r    s     }dee\\F||%fll;ellJ
		((,,KKJJOO 	- 	
 
99!!++33		

 r"   c                    [        XX#U5      nUc.  [        R                  " U[        R                  U R                  S9n[        U R                  5      S:X  ae  [        UR                  5      S:X  aL  U R                  S   UR                  S   :X  a,  U R                  S   UR                  S   :X  a  [        XU5      $ U R                  nUR                  nU(       a  [        U5      S:X  a  US   US   4nO$U(       a  [        U5      S:X  a  US   US   US   4nU(       a  [        U5      S:X  a  US   US   4nO$U(       a  [        U5      S:X  a  US   US   US   4n[        U5      S:X  Ga  UR                  5       S   UR                  S   :X  a  SnO&UR                  5       S   UR                  S   :X  a  Sn[        U R                  5      S:X  aN  U R                  5       S   U R                  S   :X  a  SnOtU R                  5       S   U R                  S   :X  a  SnOMU R                  5       S   U R                  S   :X  a  SnO&U R                  5       S   U R                  S   :X  a  Sn[        U5      S:X  a"  US   nU R                  5       U(       a  SOS   n	O.[        U5      S:X  a  [        U5      S:X  a  US   US   -  nUS   n	US   n
US   nUR                  5       U(       a  SOS   nUS   nOk[        U5      S:X  a\  [        U5      S:X  d   eUS   US   :X  a  US   US   :X  d  [        SU S	U 35      eSnSnUS   n
US   nUS   US   -  nU
nUS   n	U
n[        R                  5       R                  U R                  5      n[        XU/5        [        R                  " U[         R"                  " U5      [         R"                  " U5      [         R$                  " W
5      [         R$                  " W5      [         R$                  " W5      ['        U5      ['        U 5      ['        U5      [         R$                  " W5      [         R$                  " W	5      [         R$                  " W5      5        U$ )
Nsizerk   rU   r\  r   r   r   FTzMOnly bsi,bso->io supported for tensor contractions, but dims for A x B were: r  )r  rN   zerosint32rU   r   rm   batched_igemmstrider  rB   r/   rT   r   r   cigemmrR   c_boolr   r   )r   r   r   r  r  r  r  r  nldbmr(  ldaldcptrs                  r   igemmr    s    c>D
{kkt5;;qxxH
177|qS\Q.771:#
aggaj(@ s++	
B	
BB1eRU^	#b'Q,eRUBqE"B1eRU^	#b'Q,eRUBqE" 2w!|88:a=AGGAJ& LXXZ]aggaj(Lqww<1xxz!}
*$A!''!*,#xxz!}
*$A!''!*,#r7a<1A((*,QA6CW\c"gl11AQ%CqEqEhhj|!4e	RA2w!||1A2a5BqE>_`b_ccfgifjk  qEqEqEBqEMe

%
%
'
3
3AHH
=C qSkJJ
		,
		,


1


1


1




3


3


3 Jr"   c                 	   [        U R                  5      S:X  a  [        UR                  5      S:X  d%  [        SU R                   SUR                   35      e[        XX#U5      nUc.  [        R
                  " U[        R                  U R                  S9nUR                  5       (       a  UR                  5       S   nSnOUR                  5       nUS   UR                  S   :w  a$  UR                  5       nUR                  5       S   nOUS   UR                  S   :X  a  S	nUR                  5       S   nO}US   S:X  a$  UR                  5       nUR                  5       S   nOPUS   S:X  a$  UR                  5       nUR                  5       S   nO#UR                  5       nUR                  5       S   nU R                  5       (       a  U R                  5       S   nSnOU R                  5       nUS   U R                  S   :w  a&  U R                  5       n U R                  5       S   nSnOQUS   U R                  S   :X  a  U R                  5       S   nS	nO%U R                  5       n U R                  5       S   nSnU R                  S   n	U R                  S   n
UR                  S   nUR                  S   nUnUR                  S   UR                  S   -  nU R                  S   U R                  S   -  nU R                  S   UR                  S   -  n[        R                  5       R                  U R                  5      n[        XU/5        [        R                   " U["        R$                  " U5      ["        R$                  " U5      ["        R&                  " U5      ["        R&                  " U
5      ["        R&                  " U5      [)        U5      [)        U 5      [)        U5      ["        R&                  " U5      ["        R&                  " U5      ["        R&                  " U5      ["        R*                  " U5      ["        R*                  " U5      ["        R*                  " U5      ["        R,                  " U	5      5        U$ )
Nr\  z@Expected 3-dimensional tensors for bmm, but got shapes A and B: r  r  r   Fr   r   T)r   rm   r  r  rN   r  r  rU   is_contiguousr  
contiguousrB   r/   rT   r   r   cbatched_igemmrR   r  r   r   c_longc_uint32)r   r   r   r  r  r  r  sr  	num_batchr  r  r(  r  strideAstrideBstrideCr  s                     r   r  r  p  so    qww<1CLA$5[\]\c\c[ddijkjqjqirsttc>D
{kkt5;;qxxHhhjmHHJQ41771:A((*Q-CqTQWWQZL((*Q-CtqyLLNhhjm1LLNhhjmLLNhhjmhhjmHHJQ41771:A((*Q-C LqTQWWQZ((*Q-CLA((*Q-C L 
I	
A	
A	
A
Cggaj1771:%Gggaj1771:%Gggaj1771:%G

%
%
'
3
3AHH
=CqSk
		,
		,


1


1


1




3


3


3
		'
		'
		'
I!$ Jr"   c                     Ub6  [         R                  R                  R                  R	                  XU5        U$ [         R                  R                  R                  R                  X5      $ )a  Performs an 8-bit integer matrix multiplication.

A linear transformation is applied such that `out = A @ B.T`. When possible, integer tensor core hardware is
utilized to accelerate the operation.

Args:
    A (`torch.Tensor`): The first matrix operand with the data type `torch.int8`.
    B (`torch.Tensor`): The second matrix operand with the data type `torch.int8`.
    out (`torch.Tensor`, *optional*): A pre-allocated tensor used to store the result.
    dtype (`torch.dtype`, *optional*): The expected data type of the output. Defaults to `torch.int32`.

Raises:
    `NotImplementedError`: The operation is not supported in the current environment.
    `RuntimeError`: Raised when the cannot be completed for any other reason.

Returns:
    `torch.Tensor`: The result of the operation.
)rN   rJ  rK  int8_linear_matmulr   rM  )r   r   r   rk   s       r   r  r    sO    & 		1155aC@
99!!44<<QBBr"   	row_stats	col_statsr   c                     [         R                  R                  R                  R	                  XU[         R
                  US9nUb  UR                  U5      $ U$ )a  Performs dequantization on the result of a quantized int8 matrix multiplication.

Args:
    A (`torch.Tensor` with dtype `torch.int32`): The result of a quantized int8 matrix multiplication.
    row_stats (`torch.Tensor`): The row-wise quantization statistics for the lhs operand of the matrix multiplication.
    col_stats (`torch.Tensor`): The column-wise quantization statistics for the rhs operand of the matrix multiplication.
    out (`torch.Tensor`, *optional*): A pre-allocated tensor to store the output of the operation.
    bias (`torch.Tensor`, *optional*): An optional bias vector to add to the result.

Returns:
    `torch.Tensor`: The dequantized result with an optional bias, with dtype `torch.float16`.
)rk   r   )rN   rJ  rK  int8_mm_dequantrM  r  rO  )r   r  r  r   r   results         r   r  r    sP    & YY##33;;A)[`[h[hos;tF yy  Mr"   nnz_block_ptrc                 8   U R                  5       (       d   eSnUb  Ucx  U R                  5       R                  SU R                  S   5      nUS:  a  Xd:  nUR	                  US5        Uc  [        X5      nUc  UR                  SSS9R                  5       nXU4$ )ab  "Determine the quantization statistics for input matrix `A` in accordance to the `LLM.int8()` algorithm.

The row-wise and column-wise absmax values are determined.

For more information, see the [LLM.int8() paper](https://arxiv.org/abs/2208.07339).

<Tip>
This function is useful for training, but for inference it is advised to use [`get_row_absmax`] instead.
The column-wise quantization scales are not typically needed in inference scenarios.
</Tip>

Args:
    A (`torch.Tensor` with dtype `torch.float16`): Input tensor.
    row_stats (`torch.Tensor`, *optional*): If provided, calculation of row statistics is skipped.
    col_stats (`torch.Tensor`, *optional*): If provided, calculation of column statistics is skipped.
    nnz_block_ptr (`torch.Tensor`, *optional*): Not used.
    threshold (`float`, *optional*):
        An optional threshold for sparse decomposition of outlier features.
        No outliers are held back when 0.0. Defaults to 0.0.

Returns:
    `Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]`: A tuple containing quantization statistics.
    - `torch.Tensor` with dtype `torch.float32`: The row-wise quantization statistics.
    - `torch.Tensor` with dtype `torch.float32`: The column-wise quantization statistics.
    - `torch.Tensor` with dtype `torch.bool`, *optional*: A mask indicating the locations of outliers in the input tensor.
Nr3   r   r   F)dimkeepdim)is_floating_pointrf  ry   rm   masked_fill_get_row_absmaxamaxr%  )r   r  r  r  	thresholdoutlier_maskabsAs          r   get_colrow_absmaxr    s    D     LI-uuw||B,s?,LlC0 'q4I		a	7==?I--r"   c                 :   U R                   [        R                  :X  d   e[        U R                  SS 5      nU R                  S   n[        R
                  " U4[        R                  U R                  S9n[        U /5        [        U 5         [        R                  " [        U 5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [!        U 5      5        SSS5        U$ ! , (       d  f       U$ = f)a0  Determine the quantization statistics for input matrix `A` in accordance to the `LLM.int8()` algorithm.

For more information, see the [LLM.int8() paper](https://arxiv.org/abs/2208.07339).

Args:
    A (`torch.Tensor` with dtype `torch.float16`): The input matrix.
    threshold (`float`, *optional*):
        An optional threshold for sparse decomposition of outlier features.
        No outliers are held back when 0.0. Defaults to 0.0.

Returns:
    `torch.Tensor` with dtype `torch.float32`: The absolute maximum value for each row, with outliers ignored.
Nr3   rj   )rk   rN   r  r   rm   emptyr   rU   r   rf   r   cget_row_statsr   rR   r   r   r   )r   r  rowscolsr  s        r   r  r  @  s      77emm###D772;DTG5==JIqcN		AJIJJy!JJtJJtq!	
 
  
	 s   A3D
Dc                   r    \ rS rSrS\S\S\S\R                  S\R                  S\R                  4S jrS	rg
)COOSparseTensorif  r  r  nnzrowidxcolidxr   c                    UR                   [        R                  :X  d   eUR                   [        R                  :X  d   eUR                   [        R                  :X  d   eUR	                  5       U:X  d   eUR	                  5       U:X  d   eUR	                  5       U:X  d   eXl        X l        X0l        X@l        XPl	        X`l
        g r$   )rk   rN   r  r  r   r  r  r  r  r  r   )r   r  r  r  r  r  r   s          r   r   COOSparseTensor.__init__g  s     ||u{{***||u{{***||u}},,,||~$$$||~$$$||~$$$		r"   )r  r  r  r  r  r   N)	r:   r;   r<   r=   r   rN   r   r   r?   r@   r"   r   r  r  f  sE    "),6;llLQLLbgbnbnr"   r  c                       \ rS rSrS rSrg)CSRSparseTensoriy  c                    UR                   [        R                  :X  d   eUR                   [        R                  :X  d   eUR                   [        R                  :X  d   eUR	                  5       U:X  d   eUR	                  5       U:X  d   eUR	                  5       US-   :X  d   eXl        X l        X0l        X@l        XPl	        X`l
        g Nr   )rk   rN   r  r  r   r  r  r  rowptrr  r   )r   r  r  r  r  r  r   s          r   r   CSRSparseTensor.__init__z      ||u{{***||u{{***||u}},,,||~$$$||~$$$||~)))		r"   )r  r  r  r  r  r   Nr:   r;   r<   r=   r   r?   r@   r"   r   r  r  y      r"   r  c                       \ rS rSrS rSrg)CSCSparseTensori  c                    UR                   [        R                  :X  d   eUR                   [        R                  :X  d   eUR                   [        R                  :X  d   eUR	                  5       U:X  d   eUR	                  5       U:X  d   eUR	                  5       US-   :X  d   eXl        X l        X0l        X@l        XPl	        X`l
        g r
  )rk   rN   r  r  r   r  r  r  colptrr  r   )r   r  r  r  r  r  r   s          r   r   CSCSparseTensor.__init__  r  r"   )r  r  r  r  r  r   Nr  r@   r"   r   r  r    r  r"   r  c                    [         R                  " U R                  SS9u  pUR                  S5        [         R                  " U R
                  S-   4[         R                  U R                  R                  S9nUR                  UR                  5       UR                  5       SS9  UR                  S5        [        U R
                  U R                  U R                  X0R                  U R                   5      $ NTreturn_countsr   rj   r   )rM   srcr  )rN   uniquer  add_r  r  r  rU   scatter_longr   cumsum_r  r  r  r  r   )cooAr   countsr  s       r   coo2csrr!    s    \\$++TBNF
KKN[[$))a-)T[[EWEWXF
OO&++-VZZ\qOA
NN1499dii6;;PTP[P[\\r"   c                 4   [         R                  " U R                  5      u  pU R                  U   nU R                  U   n[         R
                  " USS9u  pVUR                  S5        [         R                  " U R                  S-   4[         R                  U R                  R                  S9nUR                  UR                  5       UR                  5       SS9  UR                  S5        [        U R                   U R                  U R"                  XsU5      $ r  )rN   r   r  r  r   r  r  r  r  r  rU   r  r  r   r  r  r  r  )r  r   
col2rowidxr  r   	colvaluesr   r  s           r   coo2cscr%    s    jj-OC[[$F[[$FS=INN1[[$))a-)T[[EWEWXF
OO)..*

!OD
NN1499dii66RRr"   c                     [         R                  " U4[         R                  US9n[         R                  " U4[         R                  US9n[         R                  " U4XCS9n[        XX%Xg5      $ )Nrj   )rN   r  r  r  )r  r  r  rU   rk   r  r  r   s           r   	coo_zerosr'    sS    [[#u{{6BF[[#u{{6BF[[#u<F4sFCCr"   out_colout_rowc                     Ub  [        S5      eUb  [        S5      eUb  [        S5      eUb  [        S5      e[        R                  R                  R                  R                  XS9$ )a  Determine the quantization statistics for input matrix `A` in accordance to the `LLM.int8()` algorithm.

The statistics are determined both row-wise and column-wise (transposed).

For more information, see the [LLM.int8() paper](https://arxiv.org/abs/2208.07339).

<Tip>
This function is useful for training, but for inference it is advised to use [`int8_vectorwise_quant`] instead.
This implementation performs additional column-wise transposed calculations which are not optimized.
</Tip>

Args:
    A (`torch.Tensor` with dtype `torch.float16`): The input matrix.
    col_stats (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the column-wise quantization scales.
    row_stats (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the row-wise quantization scales.
    out_col (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the column-wise quantized data.
    out_row (`torch.Tensor`, *optional*): A pre-allocated tensor to hold the row-wise quantized data.
    threshold (`float`, *optional*):
        An optional threshold for sparse decomposition of outlier features.

        No outliers are held back when 0.0. Defaults to 0.0.

Returns:
    `Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]`: A tuple containing the quantized tensor and relevant statistics.
    - `torch.Tensor` with dtype `torch.int8`: The row-wise quantized data.
    - `torch.Tensor` with dtype `torch.int8`: The column-wise quantized data.
    - `torch.Tensor` with dtype `torch.float32`: The row-wise quantization scales.
    - `torch.Tensor` with dtype `torch.float32`: The column-wise quantization scales.
    - `torch.Tensor` with dtype `torch.int32`, *optional*: A list of column indices which contain outlier features.
zUrow_stats must be None. int8_double_quant() does not support pre-allocated row_stats.zUcol_stats must be None. int8_double_quant() does not support pre-allocated col_stats.zQout_col must be None. int8_double_quant() does not support pre-allocated out_col.zQout_row must be None. int8_double_quant() does not support pre-allocated out_row.)r  )r  rN   rJ  rK  int8_double_quantrM  )r   r  r  r(  r)  r  s         r   r+  r+    sq    N pqqpqqlmmlmm99!!33;;A;SSr"   statsc                 h    [         R                  R                  R                  R	                  X5      $ )aA  Dequantizes a tensor with dtype `torch.int8` to `torch.float32`.

Args:
    A (`torch.Tensor` with dtype `torch.int8`): The quantized int8 tensor.
    stats (`torch.Tensor` with dtype `torch.float32`): The row-wise quantization statistics.

Returns:
    `torch.Tensor` with dtype `torch.float32`: The dequantized tensor.
)rN   rJ  rK  int8_vectorwise_dequantrM  )r   r,  s     r   r.  r.    s%     99!!99AA!KKr"   c                 h    [         R                  R                  R                  R	                  X5      $ )aG  Quantizes a tensor with dtype `torch.float16` to `torch.int8` in accordance to the `LLM.int8()` algorithm.

For more information, see the [LLM.int8() paper](https://arxiv.org/abs/2208.07339).

Args:
    A (`torch.Tensor` with dtype `torch.float16`): The input tensor.
    threshold (`float`, *optional*):
        An optional threshold for sparse decomposition of outlier features.

        No outliers are held back when 0.0. Defaults to 0.0.

Returns:
    `Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]`: A tuple containing the quantized tensor and relevant statistics.
    - `torch.Tensor` with dtype `torch.int8`: The quantized data.
    - `torch.Tensor` with dtype `torch.float32`: The quantization scales.
    - `torch.Tensor` with dtype `torch.int32`, *optional*: A list of column indices which contain outlier features.
)rN   rJ  rK  int8_vectorwise_quantrM  )r   r  s     r   r0  r0    s%    $ 99!!77??MMr"   r  c                    [        U [        5      (       d  U R                  (       a  U R                  [        R
                  :X  d   S5       e[        U R                  S   U R                  S   U R                  5       U R                  5       S   R                  5       U R                  5       S   R                  5       U R                  5       S9n UcC  [        R                  " U R                  UR                  S   4UR                  UR                  S9nU R                  nU R                   R#                  5       U:X  d   eU R$                  R#                  5       U:X  d   eU R                  R#                  5       U:X  d   eU R&                  UR                  S   :X  d   eUR)                  5       (       + nUR+                  5       U(       a  SOS   nUR                  S   n[,        R/                  5       R0                  n[3        U R                   5      n[3        U R$                  5      n	[3        U R                  5      n
[3        U5      n[3        U5      n[4        R6                  " U R                  5      n[4        R6                  " U R                  5      n[4        R6                  " U R&                  5      n[4        R6                  " UR                  S   5      n[4        R6                  " U5      n[4        R6                  " U5      n[9        U R                   U R$                  U R                  X/5        [:        R<                  " UUU	U
UUUUUUUU[4        R>                  " U5      5        U$ )Nz8Tensor must be `COOSparseTensor or a PyTorch COO tensor.r   r   )r  r  r  r  r  r   rU   rk   ) r  r  	is_sparselayoutrN   
sparse_coorm   _nnzindicesr   r   r  r  rU   rk   r  r  r   r  r  r  r  rY   r/   rH   r   rR   r   r   r   	cspmm_coor  )r  r   r   r  r  r  r  r  	ptrRowidx	ptrColidx	ptrValuesptrBptrCcnnzcrowsAccolsAccolsBcldbcldcs                      r   spmm_coorD    s   
 dO,,~~$++1A1A"A 	
F	
A
 AA		<<>!$((*<<>!$((*;;=
 {kk499aggaj1!((!''R
((C;;#%%%;;#%%%;;#%%%99
"""((L
((*<aQ
0C
''!*C

'
'
)
1
1C$I$I$I1:D3<D::dhhDZZ		"FZZ		"FZZ
#F::c?D::c?Dt{{DKKa=>MM
		,  Jr"   c                 x   UcM  [         R                  " U R                  UR                  S   4UR                  U R
                  R                  S9nU R                  nU R                  R                  5       U:X  d   eU R                  R                  5       U:X  d   eU R
                  R                  5       U:X  d   eU R                  UR                  S   :X  d    U R                   SUR                   35       e[         R                  " U R                  SS9u  pVUR                  S5      R                  5       n[         R                  " USS9u  pU	R                  5       n	UR                  5       nUS   S::  d   S	US    S
35       eUR                  [         R                   [         R"                  4;   d   e[%        U5      n
[%        U5      n[%        U	5      n[%        U R                  5      n[%        U R                  5      n[%        U R
                  5      n[%        U5      n[%        U5      n[%        U5      n[&        R(                  " UR                  5       5      n[&        R(                  " U R                  5      n[&        R(                  " U R                  5      n[&        R(                  " UR                  S   5      n[&        R(                  " UR                  S   5      n[+        U5         [-        U R                  U R                  U R
                  XU/5        UR                  [         R                   :X  a$  [.        R0                  " UUU
UUUUUUUUUUU5        OAUR                  [         R"                  :X  a#  [.        R2                  " UUU
UUUUUUUUUUU5        S S S 5        U$ ! , (       d  f       U$ = f)Nr   r2  r   z vs Tr  )
descending    z)Current max count per row is 8 but found r  )rN   r  r  rm   rU   r   rk   r  r  r   r  r  r  cumsumr   r   r  int8r   rR   r   rf   r   r    cspmm_coo_very_sparse_naive_fp16 cspmm_coo_very_sparse_naive_int8)r  r   dequant_statsr   r  _r   r   	max_countmax_idx	ptrOffsetptrMaxCount	ptrMaxIdxr9  r:  r;  r<  r=  ptrDequantStats	cnnz_rowsr>  r?  crowsBrA  s                           r   spmm_coo_very_sparserV  P  s   
{kk499aggaj1!(($++J[J[\
((C;;#%%%;;#%%%;;#%%%99
"?tyykaggY$??"T[[=IA]]1!!#FFt<IkkmGIQ<2Z!J9UV<.XYZZ77u}}ejj1111I)$K I$I$I$I1:D3<Dm,O

6<<>*I::dhhDZZ		"FZZ
#FZZ
#F		4;;T[[!-PQ77emm#00  WW

"00) 
L JM 
	L Js   .B2N**
N9g     _@r9   )T)NTr$   )Tr   T)g+ew?T)TrZ  r   r   )TrX  r   )NNN   F)NNNNrW  F)Nrb  )NNNN)NNNNr  r  )	Nr   r   r   r   r   Nr   F)r   r   Nr   )r   r   F)rZ  )NFFN)NFF)NNNr   )r   )NNNNr   )gcollections.abcr   ctypesrR   r   mathr   typingr   r   r   numpyru   rN   r   typing_extensionsr	   bitsandbytes.utilsr
   r   
cextensionr   r   rI  cadam_static_8bit_grad_32cadam_static_8bit_grad_16cmomentum_static_8bit_grad_32cmomentum_static_8bit_grad_16crmsprop_static_8bit_grad_32crmsprop_static_8bit_grad_16clion_static_8bit_grad_32clion_static_8bit_grad_16r  r   rB   rY   rU   FIRST_CUDA_DEVICErO   device_countrf   rh   r   r   r4   r   r   r   r   r   r   r   r   rS   r   r   r   r0  rL  r   rU  rh  r   rq  ru  rm  r}  r  r{  FutureWarningr  r  r  r  r.  r%  r  r  r  r  rI  r  r  r  r  r  r  r  r  r  r  r  r  r!  r%  halfr'  r+  r.  r0  rD  rV  Cr@   r"   r   <module>rm     su  
 %    ' '    ( I ,	   	%%%%
 	))))
 	((((
 	%%%%
 	%%%%
 	))))+ 8' '0* *2 " LLq1  	::q '5<< ' (5<< ( "MM2C Ku|| K!6-&S&:'T43n!x 67 !HPv P"++ P%x %HR[[$9 %n
 n
f $(%)"&D||D
5<<
 D U\\"D 
%,,		D 5<<#$DR )-%)#'"&J||J*%J U\\"J 5<<
 	J
 
%,,	J J \\JZOh &*"&++
_||
_U\\"
_ 
%,,	
_ &*"&++
_||
_U\\"
_ 
%,,	
_ &*"&++N||NU\\"N 
%,,	N 5<<#$Nf )-%)"&#	J||	J*%	J U\\"	J 
%,,			J
 }	J \\	J )-%)"&#	J||	J*%	J U\\"	J 
%,,			J
 }	J \\	J )-%)"&#J||J*%J U\\"J 
%,,		J
 }J \\JZ R]jk $("&
5<<
  
%,,	 65(()	 l& R]jk .2%)#'"&E&&.)* U\\" 5<<
 	
 
%,,	  l( R]jk&  Xell5K W]  l< R]jkF & x7M Y_  lP &*(,#VVV V 	V
 V 
V V 	V U\\"V V V V V V %V  !V$ 
%Vr ;, (,)AAA A 	A
 U\\"A A A 
A A 	A A ELL!A A 5<<
 A A  u||$!A" #A$ %A& %'A( )A* 
+A
Aj '+++ + 	+
 U\\"+ + + + + 
+ + 	+ + ELL!+ +  ell#!+" #+$ %+( 
)+\ R]jk&2f &2 &2s &2PS &2 l&2R GLjj Qn #'
""" 
%,,	"P #'bbb 
%,,	bP #']]] 
%,,	]@ X\chcncn C%,, C5<< Chu||>T C< #'#'|||| || 
%,,		
 5<<
 8 R]jk )-(,,06.||6.%6. %6. ELL)	6. 5<<x'==>6. l6.r R]jk"ell " l"J & " "]	S .3ZZ D )-(,&*&*0T||0T%0T %0T ell#	0T
 ell#0TfLu|| LELL LNU\\ N0 #'@
-
.@||@ 
%,,	@FGT 
r"   