
    ;ia              '       b   S SK Jr  S SKrS SKJr  S SKJr  S SKrS SK	J
r
JrJrJr  SSKJr  SSKJrJr  \" S	S
5      S\R&                  S\R&                  4S j5       r\" SS
5      S\R&                  S\R&                  S\R&                  4S j5       rS\R&                  S\R&                  S\R&                  4S jr\" SS
5        SSS\R&                  S\R&                  S\R&                  S\\R,                     S\\R&                     S\R&                  4S jj5       r\" SS
5      STS\R&                  4S jj5       r\" SS
5       STS\R&                  S\\R&                  \R&                  \R&                  \R&                  \\R&                     4   4S jj5       r STS\R&                  S\\R&                  \\R&                     4   4S jjr\" SS
5      S\R&                  S\R&                  S \S\\R&                  \R&                  4   4S! j5       r\" S"S
5      S\R&                  S#\R&                  S\R&                  S \S\R,                  S\R&                  4S$ j5       r\" S%S
5      S\R&                  S#\R&                  S\R&                  S \S\R,                  S\R&                  SS4S& j5       rS\R&                  S#\R&                  S\R&                  S \S\R,                  S\R&                  SS4S' jr\" S(S
5      S\R&                  S \S)\S*\R,                  S\\R&                  \R&                  4   4
S+ j5       r\" S,S
5      S\R&                  S#\R&                  S \S)\S-\\   S\R,                  S\R&                  4S. j5       r\" S/S
5      S\R&                  S#\R&                  S \S)\S-\\   S\R,                  S\R&                  SS4S0 j5       rS\R&                  S#\R&                  S \S)\S\R,                  S\R&                  SS4S1 jr\" S2S
5      S\R&                  S\R&                  S3\\   S#\R&                  S\R&                  S \S\R&                  4S4 j5       r\" S5S
5      S\R&                  S\R&                  S3\\   S#\R&                  S\R&                  S \S\R&                  SS4S6 j5       rS\R&                  S\R&                  S3\\   S#\R&                  S\R&                  S \S\R&                  SS4S7 jr \R<                  \R>                  \R@                  4\RB                  \RD                  4\RF                  \RH                  4\RJ                  \RL                  \RN                  4\RP                  \RR                  4\R<                  \R>                  \R@                  4\RT                  \RV                  \RX                  4S8.r-\R\                  \R^                  \R`                  4\Rb                  \Rd                  \Rf                  4\Rh                  \Rj                  \Rl                  4\Rn                  \Rp                  \Rr                  4\Rt                  \Rv                  \Rx                  4\Rz                  \R|                  \R~                  4S9.r@ SUS:\S;\R&                  S<\R&                  S=\R&                  S>\\R&                     S?\\R&                     S@\ASA\ASB\ASC\ASD\ASE\ASF\ASG\ASH\SI\ASJ\ASS4$SK jjrB SUS:\S;\R&                  S<\R&                  S=\R&                  S>\\R&                     SB\ASC\ASD\ASE\ASF\ASH\SI\ASL\R&                  SM\\R&                     SN\R&                  SO\\R&                     SG\ASJ\ASS4&SP jjrC\" SQS
5      " \C5        \" SRS
5      " \B5        g)V    )SequenceN)prod)Optional)CUBLAS_Context_cuda_device_of_get_tensor_streamget_ptr   )register_kernel)HIP_ENVIRONMENTlibz bitsandbytes::int8_linear_matmulcudaABc                     [         R                  " / U R                  S S QUR                  S   P7U R                  [         R                  S9n[        XU5      $ )Nr   devicedtype)torchemptyshaper   int32_int8_linear_matmul_implr   r   outs      n/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/backends/cuda/ops.py_r      sF    
++11aggaj1!((%++
VC#A#..    z$bitsandbytes::int8_linear_matmul.outr   c                     [        XU5        g )N)r   r   s      r   r   r      s    Q3'r   c                   ^^^^ XpU R                   mUR                   m[        R                  " U R                  [        R                  :H  S 5        [        R                  " UR                  [        R                  :H  S 5        [        R                  " U R
                  S:H  S 5        [        R                  " UR
                  S;   S 5        [        R                  " [        T5      S:  U4S j5        [        R                  " TR                  [        R                  :H  5        / TS S	 QTS   P7m[        R                  " TR                   T:H  UU4S
 j5        Tu  p4[        TS S	 5      nTS	   nTS	   nTS	   n[        R                  " Xg:H  UU4S j5        US-  S:w  ao  [        R                  " UR                  5       U R                  5       R                  5       5      R                  [        R                  5      n	TR                  U	5      $ [        U 5         [        R                  " 5       R!                  U R"                  5      n
[%        U 5      n[%        U5      n[%        T5      nS n[&        R(                  " U5      n[&        R(                  " U5      n[&        R(                  " U5      n[&        R(                  " U5      n[&        R(                  " U5      n[&        R(                  " U5      n[+        U 5      n[,        R.                  " XXSXXXgX5      nS S S 5        W(       a4  US:X  a  [1        S5      e[3        ST< ST< ST< SXgU4< SXEU4< 3
5      eT$ ! , (       d  f       NK= f)Nc                      g)NzB must be int8 r#   r   r   <lambda>*_int8_linear_matmul_impl.<locals>.<lambda>       0@r   c                      g)NzA must be int8r#   r#   r   r   r$   r%       r&   r      c                      g)Nz:Only two dimensional matrices are supported for argument Br#   r#   r   r   r$   r%   !   s    &br   )r(   r
   c                      g)NzCOnly two or three dimensional matrices are supported for argument Ar#   r#   r   r   r$   r%   "   s    +pr   r   c                     > ST  3$ )Nz(Input tensor dimensions need to be > 0: r#   )shapeBs   r   r$   r%   #   s    -UV\U]+^r   r   c                  (   > ST R                    ST 3$ )NzOutput shape z does not match expected shape r   )r   shapeCs   r   r$   r%   '   s    cii[Hghngo.pr   c                     > ST ST  3$ )NzQint8_linear_matmul only supports B^T @ A. Inner dimensions do not match: B @ A = z @ r#   )shapeAr,   s   r   r$   r%   1   s    cdjckknounvwr      d   z#int8_linear_matmul not implemented!z$cublasLt ran into an error!
	shapeA=z	, shapeB=z	, shapeC=z
	(lda, ldb, ldc)=z
	(m, n, k)=)r   r   _checkr   int8ndimr   r   matmulfloatttocopy_r   r   get_instanceget_contextr   r	   ctc_int32r   r   cigemmlt_32NotImplementedErrorRuntimeError)r   r   r   kmnldaldbldcresultctxptrAptrBptrCptrRowScalestream	has_errorr1   r,   r/   s     `              @@@r   r   r      s   qWWFWWF	LLEJJ&(@A	LLEJJ&(@A	LL1bc	LL6!#pq	LLf!#^_	LLekk)*&vcr{&F1I&F	LLf$&pqDAVCR[A
*C
*C
*C	LL
w Qw!|aggi7::5;;Gyy  		))+77Aqzqzs|JJqMJJqMJJqMjjojjojjo#A&OOCA$dQT[^g	 
   &&KLL9&*VIZyPeVY`cUdTffulmrsktjvw  J5 
	s   C<M  
M.zbitsandbytes::int8_mm_dequant	row_stats	col_statsr   biasreturnc                 (  ^ ^^ [         R                  " T R                  [         R                  :H  U 4S j5        [         R                  " TR                  [         R                  :H  U4S j5        [         R                  " TR                  [         R                  :H  U4S j5        [         R
                  " T [         R                  S9n[        T 5      n[        U5      n[        T5      n[        T5      n	[        R                  " [        T R                  S S 5      5      n
[        R                  " T R                  S   5      nUb)  UR                  [         R                  :X  a  [        U5      OS n[        T 5         [        R                  " XhXXU[        T 5      5        S S S 5        Ub/  UR                  [         R                  :w  a  UR!                  U5        UR#                  U=(       d    [         R                  5      $ ! , (       d  f       Nh= f)Nc                  "   > ST R                    3$ )NzA must be int32, got r   r   s   r   r$   _.<locals>.<lambda>`       3H	1Rr   c                  "   > ST R                    3$ )Nzrow_stats must be float32, got rW   )rQ   s   r   r$   rY   a       =\]f]l]l\m;nr   c                  "   > ST R                    3$ )Nzcol_stats must be float32, got rW   )rR   s   r   r$   rY   b   r\   r   rW   r   )r   r4   r   r   float32
empty_likefloat16r	   r>   r?   r   r   r   r   cdequant_mm_int32_fp16r   add_r:   )r   rQ   rR   r   rS   r   rK   ptrOutptrRowStatsptrColStatsnumRowsnumColsptrBiass   ```          r   r   r   X   s\    
LLEKK')RS	LLEMM13no	LLEMM13no 

1EMM
2C1:DS\F)$K)$Kjjaggcrl+,Gjj%G  $/DJJ%--4OgdmUYG		""{GgOabcOd	
 
 DJJ%--766%(5==)) 
	s   =$H
Hz#bitsandbytes::int8_vectorwise_quantc                 P  ^  [         R                  " T R                  [         R                  :H  U 4S j5        [         R                  " US:  S 5        [	        T R
                  S S 5      nT R
                  S   n[         R                  " UT R                  [         R                  S9n[         R                  " T R
                  T R                  [         R                  S9nS nUS:  a  T R                  5       U:  nUR                  5       (       a3  [         R                  " UR                  SS95      R                  S5      nO.[         R                  " ST R                  [         R                  S9n[        T 5         [         R"                  " [%        T 5      [%        U5      [%        U5      [&        R(                  " U5      [&        R*                  " U5      [&        R*                  " U5      [-        T 5      5        S S S 5        US:  a  Ub	  SUS S 2U4'   XTU4$ ! , (       d  f       N$= f)	Nc                  "   > ST R                    3$ )NzA must be float16, got rW   rX   s   r   r$   rY      s    5LQWWI3Vr           c                      g)Nzthreshold must be non-negativer#   r#   r   r   r$   rY      s    +Kr   r   r   r   )dim   )r   r4   r   r`   r   r   r   r   r^   r5   absanyargwhereviewint64r   r   cint8_vector_quantr	   r>   c_floatr?   r   )r   	thresholdrowscolsrQ   out_rowoutlier_colsoutlierss   `       r   r   r      s   	LLEMM)+VW	LLc!#KLD772;DDGIkk!''!((%**EGL3557i'<<>> >>(,,1,*=>CCBGL !;;qML		AJGIJJy!JJtJJtq!	
 
 axL,#$< |++! 
	s   <A=H
H%zbitsandbytes::int8_double_quantc                    [         R                  R                  R                  R	                  U US9u  p#n[        XS9u  pVUS:  a  Ub  U R                  US5      n [         R                  " U R                  S5      UR                  S5      -  5      R                  [         R                  5      nX'X5R                  5       R                  5       U4$ )N)rv   rk   g     _@r   )r   opsbitsandbytesint8_vectorwise_quantdefault_get_col_absmaxmasked_fillroundmul	unsqueezer:   r5   flattenr8   )r   rv   	quant_rowrQ   rz   rR   outlier_mask	quant_cols           r   r   r      s     */)?)?)U)U)])]	 *^ *&I, .aEI3<3MM,,AEE%L9+>+>q+AABEEejjQI,=,=,?,E,E,GUUr   c                 "   [         R                  " U R                  5       5        S nU R                  5       R	                  SU R
                  S   5      nUS:  a  X1:  nUR                  US5        UR                  SSS9R                  5       nXB4$ )Nr   rk   r   F)rm   keepdim)	r   r4   is_floating_pointro   rr   r   masked_fill_amaxr8   )r   rv   r   absArR   s        r   r   r      s     
LL$$&'L557<<AGGBK(D3(,, 		a	/557I""r   z bitsandbytes::quantize_blockwisecode	blocksizec           
      \  ^ [         R                  " U5        [        (       a  [         R                  " US;   5        O[         R                  " US;   5        [         R                  " TR                  [         R
                  :H  U4S j5        U R                  5       nX2* -  * n[         R                  " U4U R                  [         R
                  S9n[         R                  " U [         R                  S9n[        U 5         [        T5      [        U 5      [        U5      [        U5      [        R                  " U5      [        R                  " U R                  5       5      4nU R                  [         R                   :X  a  ["        R$                  " U6   O|U R                  [         R&                  :X  a  ["        R(                  " U6   OJU R                  [         R
                  :X  a  ["        R*                  " U6   O[-        SU R                   35      eS S S 5        Xe4$ ! , (       d  f       Xe4$ = f)N                  r   r   r   r   r   r   @   c                  "   > ST R                    3$ )Nzcode must be float32, got rW   )r   s   r   r$   rY      s    8RSWS]S]R^6_r   r   rW   z?Blockwise quantization only supports 16/32-bit floats, but got )r   _check_is_sizer   r4   r   r^   numelr   r   r_   uint8r   r	   r>   r?   c_intr`   r   cquantize_blockwise_fp16bfloat16cquantize_blockwise_bf16cquantize_blockwise_fp32
ValueError)r   r   r   rE   blocksabsmaxr   argss    `      r   r   r      s   	#Y"CCDY"GGH	LLu}},._`		AJF[[&1885==IF


1EKK
0C		DMAJFOCLJJy!HHQWWY
 77emm#(($/WW&(($/WW%(($/^_`_f_f^ghii# 
& ;' 
	& ;s   ?DH
H+z"bitsandbytes::dequantize_blockwiser   c           	      D    [         R                  " XS9n[        XX#XES9  U$ )NrW   r   )r   r_   _dequantize_blockwise_implr   r   r   r   r   r   s         r   r   r      s#    


1
*Cq$5JJr   z&bitsandbytes::dequantize_blockwise.outc           	         ^ ^^ [         R                  " TR                  T:H  UU4S j5        [         R                  " TR                  T R                  :H  U U4S j5        [	        T XUTTS9  g )Nc                  (   > ST  STR                    3$ NzExpected out.dtype == , got rW   r   r   s   r   r$   rY         /EeWFSVS\S\R]-^r   c                  <   > ST R                    STR                    3$ NExpected out.shape == r   r.   r   r   s   r   r$   rY   	       1GyPVWZW`W`Va/br   r   )r   r4   r   r   r   r   s   `   ``r   r   r      sI     
LLe#%^_	LLagg%'bcq&	5cJr   c           
        ^ ^ [         (       a  [        R                  " US;   5        O[        R                  " US;   5        [        R                  " T R                  [        R                  :H  U 4S j5        [        R                  " T[        R
                  [        R                  [        R                  4;   U4S j5        [        T 5         [        U5      [        T 5      [        U5      [        U5      [        R                  " U5      [        R                  " T R                  5       5      [        T 5      4nT[        R
                  :X  a  [        R                  " U6   OOT[        R                  :X  a  [        R                   " U6   O'T[        R                  :X  a  [        R"                  " U6   S S S 5        g ! , (       d  f       g = f)Nr   r   c                  "   > ST R                    3$ )NzA must be uint8, got rW   rX   s   r   r$   ,_dequantize_blockwise_impl.<locals>.<lambda>  rZ   r   c                     > ST  3$ )NzGBlockwise dequantization only supports 16bit/32bit floating types, got r#   rW   s   r   r$   r     s    YZ_Y`ar   )r   r   r4   r   r   r`   r   r^   r   r	   r>   r   r   r   r   cdequantize_blockwise_fp16cdequantize_blockwise_bf16cdequantize_blockwise_fp32)r   r   r   r   r   r   r   s   `   `  r   r   r     s#    Y"CCDY"GGH	LLEKK')RS	LL%--??a
 
	DMAJFOCLHHYHHQWWYq!
 EMM!**D1enn$**D1emm#**D1! 
		s   C$F::
Gzbitsandbytes::quantize_4bit
quant_typequant_storagec           	         ^  [         (       a  [        R                  " US;   5        O[        R                  " US;   5        [        R                  " US;   5        [        R                  " T R                  [        R                  [        R
                  [        R                  4;   U 4S j5        T R                  5       nXA* -  * n[        R                  " U4T R                  [        R                  S9n[        R                  " US-   UR                  S-  -  S4T R                  US9n[        T 5         S [        T 5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      4nT R                  [        R                  :X  a.  US:X  a  [        R                   " U6   O[        R"                  " U6   OT R                  [        R
                  :X  a.  US:X  a  [        R$                  " U6   O_[        R&                  " U6   OKT R                  [        R                  :X  a-  US:X  a  [        R(                  " U6   O[        R*                  " U6   S S S 5        Xv4$ ! , (       d  f       Xv4$ = f)	Nr   r   fp4nf4c                  "   > ST R                    3$ )NzDBlockwise 4bit quantization only supports 16/32-bit floats, but got rW   rX   s   r   r$   rY   :  s    VWXW^W^V_`r   r   rn   r(   r   )r   r   r4   r   r   r`   r^   r   r   r   itemsizer   r	   r>   r?   r   cquantize_blockwise_bf16_fp4cquantize_blockwise_bf16_nf4cquantize_blockwise_fp16_fp4cquantize_blockwise_fp16_nf4cquantize_blockwise_fp32_fp4cquantize_blockwise_fp32_nf4)	r   r   r   r   rE   r   r   r   r   s	   `        r   r   r   .  s    Y"CCDY"GGH	LL~-.	LL	ENNEMM5==AA`
 	
	AJF[[&1885==IF
++A=#9#9A#=>B188[h
iC		AJFOCLJJy!JJqM
 77enn$U"00$700$7WW%U"00$700$7WW%U"00$700$71 
4 ;5 
	4 ;s   3D/I--
I=zbitsandbytes::dequantize_4bitr   c           	      Z    [         R                  " XEU R                  S9n[        XX#XVS9  U$ )N)r   r   r   )r   r   r   _dequantize_4bit_implr   r   r   r   r   r   r   s          r   r   r   _  s)     ++e
:C!YEKJr   z!bitsandbytes::dequantize_4bit.outc           	         ^^^ [         R                  " TR                  T:H  UU4S j5        [         R                  " TR                  T:H  UU4S j5        [	        XX#TTS9  g )Nc                  (   > ST ST R                    3$ r   r.   )r   r   s   r   r$   rY   w  r   r   c                  (   > ST  STR                    3$ r   rW   r   s   r   r$   rY   x  r   r   r   )r   r4   r   r   r   r   s       ```r   r   r   m  sC     
LLe#%^_	LLe#%^_!YEsKr   c           
         ^ [         (       a  [        R                  " US;   5        O[        R                  " US;   5        [        R                  " US;   5        [        R                  " T[        R                  [        R                  [        R
                  4;   U4S j5        [        U 5         S [        U 5      [        U5      [        U5      [        R                  " U5      [        R                  " UR                  5       5      [        U 5      4nUR                  [        R                  :X  a.  US:X  a  [        R                  " U6   O[        R                   " U6   OUR                  [        R                  :X  a.  US:X  a  [        R"                  " U6   O_[        R$                  " U6   OKUR                  [        R
                  :X  a-  US:X  a  [        R&                  " U6   O[        R(                  " U6   S S S 5        g ! , (       d  f       g = f)Nr   r   r   c                     > ST  3$ )NzFBlockwise 4bit dequantization only supports 16/32-bit floats, but got r#   rW   s   r   r$   '_dequantize_4bit_impl.<locals>.<lambda>  s    XY^X_`r   r   )r   r   r4   r   r`   r^   r   r	   r>   r   r?   r   r   r   r   cdequantize_blockwise_bf16_fp4cdequantize_blockwise_bf16_nf4cdequantize_blockwise_fp16_fp4cdequantize_blockwise_fp16_nf4cdequantize_blockwise_fp32_fp4cdequantize_blockwise_fp32_nf4)r   r   r   r   r   r   r   s       `  r   r   r   |  sb    Y"CCDY"GGH	LL~-.	LL%..%--??`
 
	AJFOCLHHYJJsyy{#q!
 99&U"22D922D9YY%--'U"22D922D9YY%--'U"22D922D93 
		s   /EG??
Hzbitsandbytes::gemv_4bitr,   c           
          / U R                   S S QUS   P7n[        R                  " X`R                  U R                  S9n[        XX#XEUS9  U$ )Nr   r   r   r   )r   r   r   r   r   _gemv_4bit_impl)r   r   r,   r   r   r   r   r   s           r   r   r     sL     'aggcrl&F1I&E
++eHHAGG
<CA&$sCJr   zbitsandbytes::gemv_4bit.outc           
      
  ^ ^^ [         R                  " TR                  / T R                  S S QTS   P7:H  U UU4S j5        [         R                  " TR                  T R                  :H  U U4S j5        [	        T UTX4UTS9  g )Nr   r   c                  R   > S/ T R                   S S QTS   P7 STR                    3$ )Nr   r   r   r   r.   )r   r   r,   s   r   r$   rY     s1    ()C1773B<)C)C(DF399+Vr   c                  <   > ST R                    STR                    3$ r   rW   r   s   r   r$   rY     r   r   r   )r   r4   r   r   r   )r   r   r,   r   r   r   r   s   ` `   `r   r   r     si     
LL		/qwws|/VAY//V 
LLagg%'bcAq&&	sCr   c                    [         R                  " U5        [        R                  " US   5      n[        R                  " S5      n[        R                  " US   5      n	Un
[        R                  " U R                  S   S-   S-  5      nUn[        U 5      n[        U 5         U R                  [         R                  :X  ae  [        R                  " UUU	[        U 5      [        U5      [        U5      [        U5      [        U5      U
UU[        R                  " U5      U5        GOU R                  [         R                  :X  ad  [        R                  " UUU	[        U 5      [        U5      [        U5      [        U5      [        U5      U
UU[        R                  " U5      U5        OU R                  [         R                  :X  ac  [        R                  " UUU	[        U 5      [        U5      [        U5      [        U5      [        U5      U
UU[        R                  " U5      U5        S S S 5        g ! , (       d  f       g = f)Nr   rn   r   r(   )r   r   r>   r?   r   r   r   r   r`   r   cgemm_4bit_inference_naive_fp16r	   r   cgemm_4bit_inference_naive_bf16r^   cgemm_4bit_inference_naive_fp32)r   r   r,   r   r   r   r   rD   rE   rC   rF   rG   rH   rO   s                 r   r   r     s    
#$ 	

6!9A


1A


6!9A
C
**aggbkAo!+
,C
C"F		77emm#//



9% WW&//



9% WW%//



9%E 
		s   "FH22
I )adammomentumrmsproplionadagradlambademamix)r   r   r   r   r   r   optimizer_namegpstate1state2	unorm_vec	max_unorm
param_normbeta1beta2beta3alphaepsweight_decaysteplrgnorm_scalec                    [         R                  U S 5      nUc,  [        SU  S[        [        R                  5       5       35      eUR                  [        R                  :X  a  US   nO|UR                  [        R                  :X  a  US   nOXUR                  [        R                  :X  a  [        U5      S:X  a  US   nO%[        SUR                   SUR                   35      e[        U5         U" [        U5      [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U	5      [        R                  " U
5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                   " U5      [        R                  " U5      [        R                  " U5      [        R"                  " U5      [        R                   " UR%                  5       5      5        S S S 5        g ! , (       d  f       g = f)	NUnsupported optimizer name: . Supported optimizers: r   rn   r
   r(   zAGradient+optimizer bit data type combination not supported: grad z, optimizer )str2optimizer32bitgetr   liststr2optimizer8bit_blockwisekeysr   r   r^   r`   r   lenr   r	   r>   ru   r?   c_boolr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   
skip_zeros	optim_fns
optim_funcs                       r   _optimizer_update_32bit_implr
  e  s   ( #&&~t<I*>*::RSWXsXxXxXzS{R|}
 	
 	ww%--q\
	
EMM	!q\
	
ENN	"s9~':q\
OPQPWPWyXdekeqeqdrs
 	
 
	AJAJFOFOIJJy!JJz"JJuJJuJJuJJuJJsOJJ|$JJtJJrNJJ{#IIj!JJqwwy!%	
 
		s   1EI
I!qmap1qmap2absmax1absmax2c                 x   [         R                  U 5      nUc,  [        SU  S[        [         R	                  5       5       35      eUR
                  [        R                  :X  a  US   nOaUR
                  [        R                  :X  a  US   nO=UR
                  [        R                  :X  a  US   nO[        SUR
                   S35      e[        U5         U" [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U	5      [        R                  " U
5      [        R                  " U5      [        U5      [        U5      [        U5      [        U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " UR!                  5       5      5        S S S 5        g ! , (       d  f       g = f)Nr   r   r   rn   r(   zUnsupported gradient dtype: z@. Supported dtypes: torch.float32, torch.float16, torch.bfloat16)r  r  r   r  r  r   r   r^   r`   r   r   r	   r>   ru   r?   r  r   )r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r   r   r  optimizer_fnsoptimizer_fns                        r   %_optimizer_update_8bit_blockwise_implr    s   h 033NCM*>*::RSWXsXxXxXzS{R|}
 	
 	ww%--$Q'	
EMM	!$Q'	
ENN	"$Q'*177)3st
 	
 
	AJAJFOFOJJuJJuJJuJJuJJsOJJtJJrNENENGGJJ|$JJ{#IIj!JJqwwy!'	
 
		s   EH++
H9z-bitsandbytes::optimizer_update_8bit_blockwisez$bitsandbytes::optimizer_update_32bit)NN)rk   )F)Dcollections.abcr   ctypesr>   mathr   typingr   r   bitsandbytes.functionalr   r   r   r	   _opsr   
cextensionr   r   Tensorr   r   r   tupler   intr   strr   r   cadam32bit_grad_fp32cadam32bit_grad_fp16cadam32bit_grad_bf16cmomentum32bit_grad_32cmomentum32bit_grad_16crmsprop32bit_grad_32crmsprop32bit_grad_16clion32bit_grad_fp32clion32bit_grad_fp16clion32bit_grad_bf16cadagrad32bit_grad_32cadagrad32bit_grad_16cademamix32bit_grad_fp32cademamix32bit_grad_fp16cademamix32bit_grad_bf16r   cadam_8bit_blockwise_grad_fp32cadam_8bit_blockwise_grad_fp16cadam_8bit_blockwise_grad_bf16"cmomentum_8bit_blockwise_grad_fp32"cmomentum_8bit_blockwise_grad_fp16"cmomentum_8bit_blockwise_grad_bf16!crmsprop_8bit_blockwise_grad_fp32!crmsprop_8bit_blockwise_grad_fp16!crmsprop_8bit_blockwise_grad_bf16clion_8bit_blockwise_grad_fp32clion_8bit_blockwise_grad_fp16clion_8bit_blockwise_grad_bf16!cadagrad_8bit_blockwise_grad_fp32!cadagrad_8bit_blockwise_grad_fp16!cadagrad_8bit_blockwise_grad_bf16"cademamix_8bit_blockwise_grad_fp32"cademamix_8bit_blockwise_grad_fp16"cademamix_8bit_blockwise_grad_bf16r  r8   r
  r  r#   r   r   <module>r?     sW
   $     ` ` # . 3V</ /%,, / =/
 7@( (%,, (U\\ ( A(< < <ELL <~ 0&9
 $(#'#*||#*||#* ||#* EKK 	#*
 5<<
 #* \\#* :#*L 6?&, &, @&,R 2F; V||V 5<<u||U\\8ELLCYYZV <V* #||# 5<<%,,//0#* 3V<" "U\\ "c "eELLRWR^R^D^>_ " ="J 5v> u|| 5<< C X]XcXc hmhtht  ? 96B
K||
KLL
K ,,
K 	
K
 ;;
K 

K 

K C
K2||2"\\2162JM2V[VaVa2hmhtht2	2B .7-||- #-14-EJ[[-
5<<%&- 8-` 0&9
||
LL
 
 	

 C=
 ;;
 \\
 :
 4f=L||LLLL L 	L
 C=L ;;L 
L 
L >L,:||,:LL,: ,: 	,:
 ;;,: 
,: 
,:^ *F3||.6smEJ\\Y^YeYeru
\\ 4 .7D||D||D SMD LL	D
 ,,D D 
D 
D 8D"U||U||U SMU LL	U
 ,,U U 
U 
Up ! 	       	""""
 	!!!!
 	       	!!!!
 	       	$$$$$$9! J 	****** 	...... 	------ 	****** 	------ 	......5 h %8
8
||8
 ||8
 LL	8

 U\\"8
 %8
 8
 8
 8
 8
 8
 8
 
8
 8
 8
  	!8
" #8
& 
'8
\ 'Z
Z
||Z
 ||Z
 LL	Z

 U\\"Z
 Z
 Z
 Z
 Z
 
Z
 Z
 	Z
 <<Z
 ELL!Z
 \\Z
  ell#!Z
" #Z
$ %Z
( 
)Z
z ? HIn o 6 ?@\ ]r   