
    ;i2                     
   S SK Jr  S SKrS SKJrJr  S SKrS SKJr	  S SK
Jr   " S S\R                  R                  5      r " S S\5      r " S	 S
\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      rg)    )IterableN)LiteralOptional)Optimizer2Statec                      ^  \ rS rSrSr       SS\\R                  R                     S\	S\
\	\	\	4   S\	S\	S\	S	\\   S
\\   4U 4S jjjr\R                  " 5       SS j5       rSrU =r$ )_ReferenceAdEMAMix   z,
Reference: https://hf.co/papers/2409.03137
paramslrbetasalphaepsweight_decayt_beta3t_alphac	           
      <   > [        X#XEXgUS9n	[        T
U ]	  X5        g )N)r   r   r   r   r   r   r   )dictsuper__init__)selfr
   r   r   r   r   r   r   r   defaults	__class__s             k/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/optim/ademamix.pyr   _ReferenceAdEMAMix.__init__   s(     e<jq
 	*    c           
         S nUb%  [         R                  " 5          U" 5       nS S S 5        U R                   GH<  nSU;   a  US==   S-  ss'   OSUS'   US   nUS   nUS   u  pgnUS   n	US   n
US   nUS	   nUS
    GH  nUR                  c  M  UR                  nU R                  U   n[        U5      S:X  a?  UR                  S/UR                  5       Q75      US'   [         R                  " U5      US'   US   S   US   S   US   nnnSXcS   -  -
  nSXsS   -  -
  nU
b  [        US   U	-  U
-  U	5      n	Ubf  [        R                  " U5      n[        R                  " U5      nUS   U-  n[        [        R                  " UU-  SU-
  U-  UU-  -   -  5      U5      nUR                  U5      R                  USU-
  S9  UR                  U5      R                  USU-
  S9  UR                  U5      R                  XSU-
  S9  UR!                  5       US-  -  R#                  U5      nUR%                  U5      U	U-  -   U-  nUR                  XS9  UR                  U* U-  5        GM     GM?     U$ ! , (       d  f       GN^= f)Nstep   r   r   r   r   r   r   r   r
   r      m1_m2nu)r   )valueg      ?)torchenable_gradparam_groupsgradstatelen	new_zerossize
zeros_likeminmathlogexpmul_add_addcmul_sqrtadddiv)r   closurelossgroupr   r   beta1beta2beta3r   r   r   r   pr&   r'   m1m2r!   bias_correction1bias_correction2ln_beta1ln_beta3
step_scaledenomupdates                             r   r   _ReferenceAdEMAMix.step!   s   ""$y % &&Ef" !ftB,C"'.E%'NEI&GI&G 0L8_66>vv

1 u:? &'[[!affh%@E'N"'"2"21"5E$K"7^A.gq0A5;B#$uf'=#= #$uf'=#=  &f 5 ?GE &#xxH#xxH!&v!8J(X"5A
Nh;V[ehp[p:q!rsE ##DE	#:##DE	#:''!e)'D &6&;<AA#F&&!12URZ?5H A2 sV|$_ % '~ E %$s   I::
J	 )MbP?g?g+?gH.?      @:0yE>{Gz?NN)N)__name__
__module____qualname____firstlineno____doc__r   r#   nn	Parameterfloattupler   intr   no_gradr   __static_attributes____classcell__r   s   @r   r   r      s     ,@"!%!%+++,+ + UE5()	+
 + + + #+ #+ +" ]]_F Fr   r   c                   >  ^  \ rS rSr          SS\\R                  R                     S\S\	\\\4   S\S\
\   S\
\   S\S	\S
\S   S\S\4U 4S jjjr\R                  " 5       S 5       r\R                  " 5       U 4S j5       r\R$                  4S jrSrU =r$ )AdEMAMixk   r
   r   r   r   r   r   r   r   
optim_bits       min_8bit_sizeis_pagedc                 <   > [         TU ]  SUUUUUU	S U
SSUUUUS9  g )Nademamixd   Tr
   r   r   r   r   r^   argsrb   percentile_clipping
block_wiserc   r   r   r   r   r   )r   r
   r   r   r   r   r   r   r   r^   rb   rc   r   s               r   r   AdEMAMix.__init__l   sD     	%!' # 	 	
r   c                    U R                  X4U5      nUS   S:X  a  [        R                  nO+US   S:X  a  [        R                  nO[	        SUS    35      eUR                  5       US   :  a  [        R                  nU R                  U   nSUS'   U[        R                  :X  Ga  SU R                  ;  a  U R                  5         U R                  S   R                  UR                  5      =U R                  S'   US	'   U R                  S
   R                  UR                  5      =U R                  S
'   US'   SnUR                  5       n	X-  [        X-  5      -   n
[        R                  " SU
4[        R                  UR                  S9US'   [        R                  " U
4[        R                  UR                  S9US'   U R                  X&S9US'   U R                  X&S9US'   g )Nr^   ra   r`   z(Amount of optimizer bits not supported: rb   r   r   dynamicqmap1udynamicqmap2   r   dtypedeviceabsmax1absmax2)rt   state1state2)
get_configr#   float32uint8NotImplementedErrornumelr'   	name2qmap	fill_qmaptoru   boolzeros_get_state_double_bufferget_state_buffer)r   r8   r<   gindexpindexconfigrt   r'   	blocksizenblockss              r   
init_stateAdEMAMix.init_state   s    7,2%MMEL!Q&KKE%(PQWXdQePf&ghh779vo..MME

1fEKK. 9=	9R9U9UVWV^V^9__DNN9%g:>..:T:W:WXYX`X`:aaDNN:&wI	AnQ](;;F${{Av;emmTUT\T\]E)${{F9EMMRSRZRZ[E)777Gh///?hr   c                 D  > U R                  X4U5      nUS   c  US   c  [        TU ]	  XX45        g UR                  R	                  5       Ul        UR
                  R	                  5       Ul        U R                  U   nUR
                  nUS==   S-  ss'   US   nUS   u  pnUS   nUS   nUS   nUb  [        X-  U-  U5      nOUnUbc  [        R                  " U	5      n[        R                  " U5      nX-  n[        [        R                  " UU-  SU-
  U-  UU-  -   -  5      U5      nOUnUS   R                  [        R                  :X  aR  [        R                  " U R                   UUUS   U	US   UUS	   US
   U
UUUS   SUS   S:  a  US   OS US   US   S9  g US   R                  [        R"                  :X  a[  [        R$                  " U R                   UUUS   US
   US   S   US   S   UUUS   UUS	   US   US   US   US   US   SUS   S9  g g )Nr   r   r   r   r   r   rx   r   r   ry   r   g      ?	max_unormg        	unorm_vec
skip_zeros)gnorm_scaler   r   r   r   ro   rq   rv   rw   )r   r   )rz   r   update_stepdata
contiguousr&   r'   r,   r-   r.   r/   rt   r#   r{   Foptimizer_update_32bitoptimizer_namer|   optimizer_update_8bit_blockwise)r   r8   r<   r   r   r   r'   r&   r   r9   r:   r;   r   r   r   alpha_trA   rB   rC   beta3_tr   s                       r   r   AdEMAMix.update_step   s}   7)$	):)BG&9 ""$""$

1vvfV}$Woew## $,0%8GG xxHxxHJ(X-A
Nh3NS]`hSh2ijkmrG G ?  EMM1$$##huth~&06{0Cc0I%,t -!,/#& 8_""ekk1--##hhw"w"utggi i ~&!,/' 2r   c                    U R                   (       a  UR                  5       S:  a1  [        R                  " S/UR	                  5       Q7X!R
                  S9$ [        R                  " S/UR	                  5       Q7X!R
                  S.6n[        R                  " US5        U R                  R                  R                  U5        U$ )Ng     j@r   rs   r   )rc   r~   r#   r   r*   ru   r   	get_pagedfillpage_mngpaged_tensorsappend)r   r<   rt   buffs       r   r   !AdEMAMix._get_state_double_buffer  s    }}	C;;~AFFH~U88LL;;QVVXeHHMDFF4OMM''..t4Kr   rG   )
rH   rI   rJ   NNrK   rL   ra      F)rM   rN   rO   rP   r   r#   rR   rS   rT   rU   r   rV   r   r   r   rW   r   r   r{   r   rX   rY   rZ   s   @r   r\   r\   k   s    ,@!%!%"%'!
++,
 
 UE5()	

 
 #
 #
 
 
 EN
 
 
 
@ ]]_$@ $@L ]]_R Rh 16  r   r\   c                      ^  \ rS rSr         SS\\R                  R                     S\S\	\\\4   S\S\
\   S\
\   S\S	\S
\S\4U 4S jjjrSrU =r$ )AdEMAMix8biti  r
   r   r   r   r   r   r   r   rb   rc   c                 4   > [         TU ]  UUUUUUUUSU	U
S9  g )Nr`   
r   r   r   r   r   r   r   r^   rb   rc   rk   r   r
   r   r   r   r   r   r   r   rb   rc   r   s              r   r   AdEMAMix8bit.__init__  s8     	%' 	 	
r   rG   	rH   rI   rJ   NNrK   rL   r   FrM   rN   rO   rP   r   r#   rR   rS   rT   rU   r   rV   r   r   rX   rY   rZ   s   @r   r   r     s     ,@!%!%"!
++,
 
 UE5()	

 
 #
 #
 
 
 
 
 
r   r   c                      ^  \ rS rSr        SS\\R                  R                     S\S\	\\\4   S\S\
\   S\
\   S\S	\S
\4U 4S jjjrSrU =r$ )PagedAdEMAMix8biti/  r
   r   r   r   r   r   r   r   rb   c
                 2   > [         T
U ]  UUUUUUUUU	SS9
  g NT)	r   r   r   r   r   r   r   rb   rc   rk   r   r
   r   r   r   r   r   r   r   rb   r   s             r   r   PagedAdEMAMix8bit.__init__0  5     	%' 	 	
r   rG   rH   rI   rJ   NNrK   rL   r   rM   rN   rO   rP   r   r#   rR   rS   rT   rU   r   rV   r   rX   rY   rZ   s   @r   r   r   /       ,@!%!%"!
++,
 
 UE5()	

 
 #
 #
 
 
 
 
r   r   c                      ^  \ rS rSr         SS\\R                  R                     S\S\	\\\4   S\S\
\   S\
\   S\S	\S
\S   S\4U 4S jjjrSrU =r$ )PagedAdEMAMixiJ  r
   r   r   r   r   r   r   r   r^   r_   rb   c                 4   > [         TU ]  UUUUUUUUU	U
SS9  g )NTr   rk   )r   r
   r   r   r   r   r   r   r   r^   rb   r   s              r   r   PagedAdEMAMix.__init__K  s8     	%!' 	 	
r   rG   )	rH   rI   rJ   NNrK   rL   ra   r   )rM   rN   rO   rP   r   r#   rR   rS   rT   rU   r   rV   r   r   rX   rY   rZ   s   @r   r   r   J  s     ,@!%!%"%'!
++,
 
 UE5()	

 
 #
 #
 
 
 EN
 
 
r   r   c                      ^  \ rS rSr         SS\\R                  R                     S\S\	\\\4   S\S\
\   S\
\   S\S	\S
\S\4U 4S jjjrSrU =r$ )AdEMAMix32bitig  r
   r   r   r   r   r   r   r   rb   rc   c                 <   > [         TU ]  SUUUUUSS U	SSU
UUUS9  g )Nre   ra   rf   Trg   rk   r   s              r   r   AdEMAMix32bit.__init__h  sD     	%' # 	 	
r   rG   r   r   rZ   s   @r   r   r   g  s     ,@!%!%"!
++,
 
 UE5()	

 
 #
 #
 
 
 
 
 
r   r   c                      ^  \ rS rSr        SS\\R                  R                     S\S\	\\\4   S\S\
\   S\
\   S\S	\S
\4U 4S jjjrSrU =r$ )PagedAdEMAMix32biti  r
   r   r   r   r   r   r   r   rb   c
                 2   > [         T
U ]  UUUUUUUUU	SS9
  g r   rk   r   s             r   r   PagedAdEMAMix32bit.__init__  r   r   rG   r   r   rZ   s   @r   r   r     r   r   r   )collections.abcr   r-   typingr   r   r#   bitsandbytes.functional
functionalr   bitsandbytes.optim.optimizerr   optim	Optimizerr   r\   r   r   r   r   r   rG   r   r   <module>r      s|    $  $  # 8].. ]@d dN
8 
:
 
6
H 
:
O 
B
 
r   