
    ;i1v                         S SK JrJr  S SKJr  S SKJr  S SKJ	r	  S SK
r
S SKJr  S SKJr   " S S5      r " S	 S
5      r " S S\
R$                  R&                  5      r " S S\5      r " S S\5      rg)    )abcdefaultdict)deepcopy)chain)OptionalN)sync_gpuc                       \ rS rSrS rSrg)MockArgs   c                 2    U H  n[        XX   5        M     g N)setattr)selfinitial_datakeys      l/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/optim/optimizer.py__init__MockArgs.__init__   s    CD|01       N)__name__
__module____qualname____firstlineno__r   __static_attributes__r   r   r   r
   r
      s    2r   r
   c                   N    \ rS rSrSrSrS rS r\S 5       r	S r
SS jrS	 rS
rg)GlobalOptimManager   zC
A global optimizer manager for enabling custom optimizer configs.
Nc                     [        S5      e)NzCall get_instance() instead)RuntimeErrorr   s    r   r   GlobalOptimManager.__init__   s    899r   c                 J    0 U l         0 U l        S U l        SU l        / U l        g )NF)
pid2configindex2config	optimizeruses_config_overridemodule_weight_config_tripler!   s    r   
initializeGlobalOptimManager.initialize    s(    $)!+-(r   c                     U R                   c0  U R                  U 5      U l         U R                   R                  5         U R                   $ r   )	_instance__new__r)   )clss    r   get_instanceGlobalOptimManager.get_instance'   s6    == KK,CMMM$$&}}r   c                 ,   [        U5      n[        US   [        5      (       d  SU0/n[        U5       H]  u  p4[        US   5       HF  u  pV[	        U5      U R
                  ;   d  M   U R
                  [	        U5         U R                  X54'   MH     M_     g )Nr   params)list
isinstancedict	enumerateidr$   r%   )r   r2   param_groupsgroup_indexgroupp_indexps          r   register_parameters&GlobalOptimManager.register_parameters.   s    F|,q/400%|45L"+L"9K'h8
a5DOO+@DPRSTPU@VD%%{&<= 9 #:r   c                    SU l         [        U[        R                  R                  5      (       a  U/n[        U[        R
                  5      (       a  U/nUb  Ub  Ub   eX#0nUbc  U H\  n[        U5      U R                  ;   a)  U R                  [        U5         R                  U5        ME  X@R                  [        U5      '   M^     gg)a  
Override initial optimizer config with specific hyperparameters.

The key-values of the optimizer config for the input parameters are overridden
This can be both, optimizer parameters like `betas` or `lr`, or it can be
8-bit specific parameters like `optim_bits` or `percentile_clipping`.

Arguments:
   parameters (`torch.Tensor` or `list(torch.Tensors)`):
     The input parameters.
   key (`str`):
     The hyperparameter to override.
   value:
     The hyperparameter value.
   key_value_dict (`dict`):
     A dictionary with multiple key-values to override.

Example:

```py
import torch
import bitsandbytes as bnb

mng = bnb.optim.GlobalOptimManager.get_instance()

model = MyModel()
mng.register_parameters(model.parameters()) # 1. register parameters while still on CPU

model = model.cuda()
# use 8-bit optimizer states for all parameters
adam = bnb.optim.Adam(model.parameters(), lr=0.001, optim_bits=8)

# 2. override: the parameter model.fc1.weight now uses 32-bit Adam
mng.override_config(model.fc1.weight, 'optim_bits', 32)
```
TN)	r'   r4   torchnn	ParameterTensorr7   r$   update)r   
parametersr   valuekey_value_dictr<   s         r   override_config"GlobalOptimManager.override_config8   s    J %)!j%(("4"455$Jj%,,//$J?u0!)))!\N%a5DOO+OOBqE*11.A-;OOBqE*	   &r   c                 >    U R                   R                  XU45        g r   )r(   append)r   module
param_nameconfigs       r   register_module_override+GlobalOptimManager.register_module_overridem   s    ((//V0LMr   )r%   r(   r&   r$   r'   )NNN)r   r   r   r   __doc__r,   r   r)   classmethodr/   r=   rH   rO   r   r   r   r   r   r      s>     I:.  W3<jNr   r   c                      ^  \ rS rSrSU 4S jjrS rU 4S jrSS jrS rS r	\
R                  " 5       SS j5       rS	 rS
 rS r\
R                   4S jrS rSrU =r$ )Optimizer8bitq   c                   > [         TU ]  X5        SU l        0 U l        X@l        [
        R                  R                  5       U l        [        R                  5       U l
        1 SkU l        US:X  a  U R                  5         gg)a2  
Base 8-bit optimizer class.

Arguments:
    params (`torch.Tensor`):
        The input parameters to optimize.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    is_paged (`bool`, defaults to `False`):
        Whether the optimizer is a paged optimizer or not.
F>   max1max2qmap1qmap2state1state2absmax1absmax2new_max1new_max2	gnorm_vec	unorm_vec   N)superr   initialized	name2qmapis_pagedFGlobalPageManagerr/   page_mngr   mngnon_castable_tensor_keys	fill_qmap)r   r2   defaults
optim_bitsrg   	__class__s        r   r   Optimizer8bit.__init__r   sn     	*  ++88:%224)
% ?NN r   c                     [         R                  " SS9U R                  S'   [         R                  " SS9U R                  S'   g )NT)signeddynamicFudynamic)rh   create_dynamic_maprf   r!   s    r   rm   Optimizer8bit.fill_qmap   s4    $%$8$8$Ey!%&%9%9%Gz"r   c                 $   > [         TU ]  U5        g r   )rd   __setstate__)r   staterp   s     r   ry   Optimizer8bit.__setstate__   s    U#r   c                   ^ ^^ [        U5      nT R                  nUS   n[        U5      [        U5      :w  a  [        S5      eS U 5       nS U 5       n[	        S [        XV5       5       5      (       a  [        S5      e[        [        R                  " S U 5       5      [        R                  " S U 5       5      5       VVs0 s H  u  pxXx_M	     n	nnUUU 4S	 jm[        [        5      n
US
   R                  5        H  u  pX;   a  X   nT" X5      X'   M  XU'   M      S n[        X45       VVs/ s H  u  nnU" UU5      PM     nnnT R                  U
US.5        gs  snnf s  snnf )a  Load an optimizer state.

Arguments:
    state_dict (`dict`):
        An optimizer state (should be returned from a call to `state_dict`) to load.
    move_to_device (`bool`, defaults to `True`):
        Whether to move the optimizer's state to the device.
r8   z<loaded state dict has a different number of parameter groupsc              3   >   #    U  H  n[        US    5      v   M     g7fr2   Nlen.0gs     r   	<genexpr>0Optimizer8bit.load_state_dict.<locals>.<genexpr>   s     71c!H+&&   c              3   >   #    U  H  n[        US    5      v   M     g7fr~   r   r   s     r   r   r      s     =1c!H+&&r   c              3   .   #    U  H  u  pX:g  v   M     g 7fr   r   )r   p_lens_lens      r   r   r      s     N2M,%u~2Ms   z]loaded state dict contains a parameter group that doesn't match the size of optimizer's groupc              3   *   #    U  H	  oS    v   M     g7fr~   r   r   s     r   r   r      s     #FAhK   c              3   *   #    U  H	  oS    v   M     g7fr~   r   r   s     r   r   r      s     #@AhKr   c                 D  >^  [        U[        R                  5      (       aP  T R                  5       (       a9  UR                  [        R
                  :w  a  UR                  T R                  5      nU$ [        U[        5      (       a^  UR                  5        HH  u  p#UTR                  ;   a(  T(       a  UR                  T R                  5      X'   M;  M=  T" T U5      X'   MJ     U$ [        U[        R                  5      (       a  [        U5      " UU 4S jU 5       5      $ U$ )zBMake a deep copy of value, casting all tensors to device of param.c              3   6   >#    U  H  nT" TU5      v   M     g 7fr   r   )r   vcastparams     r   r   >Optimizer8bit.load_state_dict.<locals>.cast.<locals>.<genexpr>   s     "A5a4q>>5s   )r4   r@   rC   is_floating_pointdtypeuint8tor5   itemsrl   devicecontainer_abcsIterabletype)r   rF   kr   r   move_to_devicer   s   `   r   r   +Optimizer8bit.load_state_dict.<locals>.cast   s    %.. **,,1K!HHU[[1EE4((!KKMDAD999)'(ttELL'9EH * $(q> * E>#:#:;;E{"A5"AAAr   rz   c                     U S   US'   U$ )Nr2   r   )r:   	new_groups     r   update_group3Optimizer8bit.load_state_dict.<locals>.update_group   s    "'/Ihr   )rz   r8   N)r   r8   r   
ValueErroranyzipr   from_iterabler   r5   r   ry   )r   
state_dictr   groupssaved_groups
param_lens
saved_lensold_idr<   id_maprz   r   r   r   r   r   ngr8   r   s   ` `               @r   load_state_dictOptimizer8bit.load_state_dict   sh    j)
""!.1v;#l++[\\77
==
N#j2MNNNo  !###F#FF###@#@@
	 I 	 
	2 D!w'--/DA{	#E~a 0	 :=V9RS9R2Q+9RSE<HI_
\ Ts   E!3E'c                    [        U R                  5       H  u  p[        US   5       H  u  p4X@R                  ;   d  M  U R                  U   nUR                  5        Hg  u  pg[	        U[
        R                  5      (       d  M&  [        USS5      nU(       a  M<  UR                  UR                  5      U R                  U   U'   Mi     M     M     g )Nr2   rg   F)
r6   r8   rz   r   r4   r@   rC   getattrr   r   )	r   gindexr:   pindexr<   valuesr   r   rg   s	            r   to_gpuOptimizer8bit.to_gpu   s    &t'8'89MF&uX7	

?!ZZ]F &%a66'.q*e'DH#+83444>

1a 0	 !/ 8 :r   c                 l   U R                   R                   GH  u  pn[        X5      nUc   e[        U[        R
                  5      (       d!  [        U[        R                  5      (       d   eSn[        U R                  5       H  u  pgU(       a    M}  [        US   5       H  u  pU(       a    M,  [        U	5      [        U5      :X  d  M)  X0R                   R                  [        U	5      '   U R                   R                  [        U	5         U R                   R                  Xh4'   SnM     M     GM     g )NFr2   T)rk   r(   r   r4   r@   rC   rB   r6   r8   r7   r$   r%   )
r   rL   attrrN   pmodulefoundr   r:   r   r<   s
             r   check_overridesOptimizer8bit.check_overrides   s    $(HH$H$H F&f+G&&&gu||44
7EOO8\8\\\E!*4+<+<!=!*5?!;IF!u7+ 6<++BqE2BF((BUBUVXYZV[B\--v.>? $ "< "> %Ir   c                 t   SnUb%  [         R                  " 5          U" 5       nSSS5        U R                  (       d'  U R                  5         U R	                  5         SU l        Sn[        U R                  5       H  u  pE[        US   5       Hr  u  pcUR                  c  M  U R                  U   n[        U5      S:X  a  U R                  XSXF5        U R                  U5        U R                  XSXF5        [        U5        Mt     M     U R                  (       a  Ub  [        U5        U$ ! , (       d  f       GN= f)zPerform a single optimization step.

Arguments:
    closure (`Callable`, *optional*, defaults to `None`):
        A closure that reevaluates the model and returns the loss.
NTr2   r   )r@   enable_gradre   r   r   r6   r8   gradrz   r   
init_stateprefetch_stateupdate_stepr   rg   )r   closurelossr<   r   r:   r   rz   s           r   stepOptimizer8bit.step  s     ""$y %   "KKM#D &t'8'89MF&uX7	66>

1u:?OOEf=##A&  6: 8 : ==Q] QK5 %$s   D((
D7c                 |   0 nUS   US'   US   US'   US   US'   US   US'   UR                  SS5      US'   UR                  SS5      US'   UR                  S	S5      US	'   U R                  R                  US
'   U R                  R                  US'   U R                  R                  US'   U R                  R
                  US'   U R                  R                  US'   U R                  R                  US'   X4U R                  R                  ;   a)  UR                  U R                  R                  X4   5        U$ )Nbetasepsweight_decaylralpha        t_alphar   t_beta3ro   min_8bit_sizepercentile_clipping
block_wise	max_unorm
skip_zeros)getargsro   r   r   r   r   r   rk   r%   rD   )r   r   r   r:   rN   s        r   
get_configOptimizer8bit.get_config,  s,   .weu!&~!6~T{t))GS1w!IIi3y!IIi3y#yy33|"&))"9"9(,		(E(E$%#yy33|"ii11{#yy33|txx444MM$((//0@ABr   c                     [        S5      e)Nz(init_state method needs to be overriddenNotImplementedErrorr   r:   r<   r   r   s        r   r   Optimizer8bit.init_state@  s    !"LMMr   c                     [        S5      e)Nz-The update_step method needs to be overriddenr   r   s        r   r   Optimizer8bit.update_stepC  s    !"QRRr   c                 Z   U R                   (       a  UR                  5       S:  a  [        R                  " XUR                  S9$ [
        R                  " UR                  X!R                  S.6n[
        R                  " US5        U R                  R                  R                  U5        U$ )Ng     j@r   r   r   )rg   numelr@   
zeros_liker   rh   	get_pagedshapefillrj   paged_tensorsrK   )r   r<   r   buffs       r   get_state_bufferOptimizer8bit.get_state_bufferF  sp    }}	C##A188DD ;;uXXFDFF4OMM''..t4Kr   c                     U R                   (       ac  U R                  U   nUS   n[        USS5      nU(       a:  [        R                  " US   5        SU;   a  [        R                  " US   5        g g g g )Nr[   rg   Fr\   )rg   rz   r   rh   prefetch_tensor)r   r<   rz   s1rg   s        r   r   Optimizer8bit.prefetch_stateP  sk    ==JJqMExBr:u5H!!%/2u$%%eHo6 % 	 r   )re   rg   rk   rf   rl   rj   )    F)Tr   )r   r   r   r   r   rm   ry   r   r   r   r@   no_gradr   r   r   r   float32r   r   r   __classcell__rp   s   @r   rT   rT   q   sk    #JH$HJT	B%& ]]_# #J(NS ). 7 7r   rT   c                      ^  \ rS rSr               SS\\   S\\   4U 4S jjjr\R                  " 5       S 5       r	\R                  " 5       S 5       r
SrU =r$ )	Optimizer2Statei[  r   r   c           
        > SU::  d  [        SU 35      eSU::  d  [        SU 35      e[        U[        5      (       aZ  UR                  SS5      R                  SS5      R	                  5       R                  S5      nU Vs/ s H  n[        U5      PM     nn[        [        U5      5       H)  nSUU   s=::  a  S:  a  M  O  [        S	U S
UU    35      e   SU::  d  [        SU 35      e[        X4XVUUUS9n[        TU ]-  UUX~5        Uc+  0 nXxS'   XS'   XS'   XS'   XS'   XS'   [        U5      U l        OXl        Xl        gs  snf )a@  
Base 2-state update optimizer class.

Arguments:
    optimizer_name (`str`):
        The name of the optimizer.
    params (`torch.Tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-3):
        The learning rate.
    betas (`tuple`, defaults to (0.9, 0.999)):
        The beta values for the optimizer.
    eps (`float`, defaults to 1e-8):
        The epsilon value for the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
    max_unorm (`float`, defaults to 0.0):
        The maximum value to normalize each block with.
    skip_zeros (`bool`, defaults to `False`):
        Whether to skip zero values for sparse gradients and models to ensure correct updates.
    is_paged (`bool`, defaults to `False`):
        Whether the optimizer is a paged optimizer or not.
    alpha (`float`, defaults to 0.0):
        The alpha value for the AdEMAMix optimizer.
    t_alpha (`Optional[int]`, defaults to `None`):
        Number of iterations for alpha scheduling with AdEMAMix.
    t_beta3 (`Optional[int]`, defaults to `None`):
        Number of iterations for beta scheduling with AdEMAMix.

r   Invalid learning rate: Invalid epsilon value: ( ),      ? Invalid beta parameter at index : Invalid weight_decay value: )r   r   r   r   r   r   r   Nro   r   r   r   r   r   )r   r4   strreplacestripsplitfloatranger   r5   rd   r   r
   r   optimizer_name)r   r	  r2   r   r   r   r   ro   r   r   r   r   r   r   rg   r   r   r   birn   rp   s                        r   r   Optimizer2State.__init__\  sn   x by6rd;<<cz6se<==eS!!MM#r*223;AACII#NE',-u!U1XuE-s5z"A%((S( #CA3bq
!STT # l";L>JKKC%Y`jq
 	:@<D!+$1!*=&'!+ )!+ DII,5 .s   Ec                 P   U R                  X4U5      nUS   S:X  a  [        R                  nO+US   S:X  a  [        R                  nO[	        SUS    35      eUR                  5       US   :  a  [        R                  nU R                  U   nSUS'   U[        R                  :X  aD  U R                  U[        R                  S9US	'   U R                  U[        R                  S9US
'   GOeU[        R                  :X  GaP  US   S:X  a  SU R                  ;  a  U R                  5         U R                  S   R                  UR                  5      U R                  S'   U R                  S   R                  UR                  5      U R                  S'   U R                  U[        R                  S9US	'   U R                  S   US'   U R                  U[        R                  S9US
'   U R                  S   US'   US   (       a  SnUR                  5       n	X-  [        X-  5      -   n
[        R                  " U
4[        R                  UR                  S9US'   [        R                  " U
4[        R                  UR                  S9US'   O[        R                  " S[        R                  UR                  S9US'   [        R                  " S[        R                  UR                  S9US'   [        R                  " S[        R                  UR                  S9US'   [        R                  " S[        R                  UR                  S9US'   US   S:  a"  [        R                  " SUR                  S9US'   US   S:  a#  [        R                  " SUR                  S9US '   g g )!Nro   r   rc   (Amount of optimizer bits not supported: r   r   r   r   r[   r\   rt   ru   rY   rZ   r      r   r]   r^      rW   r_   rX   r`   r   d   r  r   ra   r   r   rb   r   r@   r   r   r   r   rz   r   rf   rm   r   r   boolzerosr   r:   r<   r   r   rN   r   rz   	blocksizenblockss              r   r   Optimizer2State.init_state  s   7,2%MMEL!Q&KKE%(PQWXdQePf&ghh779vo..MME

1fEMM!"33AU]]3KE(O"33AU]]3KE(Oekk!V}!DNN2NN$,0NN9,E,H,H,Ry)-1^^J-G-J-J188-Tz*"33AU[[3IE(O!^^I6E'N"33AU[[3IE(O!^^J7E'Nl#	GGI.D,??#(;;yVWV^V^#_i #(;;yVWV^V^#_i  %Dahh Wf$)KKEMMRSRZRZ$[j! %Dahh Wf$)KKEMMRSRZRZ$[j!'(3.!&VAHH!EE++$!&T!((!CE+ %r   c                    UR                   R                  5       Ul         UR                  R                  5       Ul        U R                  U   nUR                  nU R	                  X4U5      nUS==   S-  ss'   US   nUS   S:  a#  [
        R                  " UUS   UUS   5      u  pnOSnUS   R                  [        R                  :X  a  [
        R                  " U R                  UUUS   US   S	   US
   UUS   US   US   S   [        US   5      S:  a  US   S   OSUR                  SS5      US   UUS   S:  a  US   OS US   US   S9  g US   R                  [        R                  :X  a  US   (       d  [
        R                  " U R                  UUUS   US   US   S	   US   S   US
   UUS   US   US   US   US   US   US   US   UUS   S:  a  US   OS US   S9  US   US   sUS'   US'   US   US   sUS'   US'   g US   R                  [        R                  :X  a  US   (       a  [
        R                   " U R                  UUUS   US   US   S	   US   S   [        US   5      S:  a  US   S   OSUR                  SS5      US
   UUS   US   US   US   US   US   UUS   S 9  g g g )!Nr   r  r   r  ra   r   r[   r   r   r   r   r\         r   r   r   r   rb   r   r   r   r   rY   rZ   rW   rX   r_   r`   )gnorm_scalerb   r   r]   r^   r"  r   )data
contiguousr   rz   r   rh   r   r   r@   r  optimizer_update_32bitr	  r   r   r   optimizer_update_8bitoptimizer_update_8bit_blockwiser   r:   r<   r   r   rz   r   rN   r   current_gnorm
clip_valuer"  s               r   r   Optimizer2State.update_step  sN    ""$""$

1vv7fV}'(3.565J5Jk",-	62M{ K?  EKK/$$##hw"uthw"&)&/&:a&?w"S

7C(~&&,[&9C&?k"T -!,/#( 8_""ekk1&:N####hhw"w"utggffj!j!~&'06{0Cc0I%,t -)0 05Z/@%-,E&M5,/4Z/@%-,E&M5,8_""ekk1f\6J--##hhw"w"&)&/&:a&?w"S

7C(utggi i ~&'!,/' 7K1r   r   r	  )MbP?)?g+?:0yE>r   r   N   r  Tr   FFr   NN)r   r   r   r   r   intr   r@   r   r   r   r   r   r   s   @r   r   r   [  s    
 !%!%%]-" ##]-$ #%]- ]-~ ]]_1D 1Df ]]_\ \r   r   c                      ^  \ rS rSr            SU 4S jjr\R                  " 5       S 5       r\R                  " 5       S 5       rSr	U =r
$ )Optimizer1StateiO  c                   > SU::  d  [        SU 35      eSU::  d  [        SU 35      e[        [        U5      5       H'  nSXO   s=::  a  S:  a  M  O  [        SU SXO    35      e   SU::  d  [        SU 35      e[        X4XVS9n[        TU ]  UUX~5        Uc+  0 nXxS
'   XS'   XS'   XS'   XS'   XS'   [        U5      U l        OXl        Xl        g	)a  
Base 1-state update optimizer class.

Arguments:
    optimizer_name (`str`):
        The name of the optimizer.
    params (`torch.Tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-3):
        The learning rate.
    betas (`tuple`, defaults to (0.9, 0.0)):
        The beta values for the optimizer.
    eps (`float`, defaults to 1e-8):
        The epsilon value for the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
    max_unorm (`float`, defaults to 0.0):
        The maximum value to normalize each block with.
    skip_zeros (`bool`, defaults to `False`):
        Whether to skip zero values for sparse gradients and models to ensure correct updates.
    is_paged (`bool`, defaults to `False`):
        Whether the optimizer is a paged optimizer or not.
r   r   r   r   r   r  r  )r   r   r   r   Nro   r   r   r   r   r   )	r   r  r   r5   rd   r   r
   r   r	  )r   r	  r2   r   r   r   r   ro   r   r   r   r   r   r   rg   r  rn   rp   s                    r   r   Optimizer1State.__init__P  s   d by6rd;<<cz6se<==s5z"A%((S( #CA3b
!STT # l";L>JKK2O:@<D!+$1!*=&'!+ )!+ DII,r   c                    U R                  X4U5      nUS   S:X  a  [        R                  nO+US   S:X  a  [        R                  nO[	        SUS    35      eUR                  5       US   :  a  [        R                  nU R                  U   nSUS'   U[        R                  :X  a#  U R                  U[        R                  S9US	'   GOiU[        R                  :X  GaT  US   S:X  aU  S
U R                  ;  a  U R                  5         U R                  S
   R                  UR                  5      U R                  S
'   U R                  U[        R                  S9US	'   U R                  S
   US'   US   (       aW  SnUR                  5       n	X-  [        X-  5      -   n
[        R                  " U
4[        R                  UR                  S9US'   Ob[        R                  " S[        R                  UR                  S9US'   [        R                  " S[        R                  UR                  S9US'   US   S:  a"  [        R                  " SUR                  S9US'   US   S:  a#  [        R                  " SUR                  S9US'   g g )Nro   r   rc   r  r   r   r   r  r[   rt   rY   r   r  r   r]   r  rW   r_   r   r  r  r  ra   r   r   rb   r  r  s              r   r   Optimizer1State.init_state  s   7,2%MMEL!Q&KKE%(PQWXdQePf&ghh779vo..MME

1fEMM!"33AU]]3KE(Oekk!V}!DNN2NN$,0NN9,E,H,H,Ry)"33AU[[3IE(O!^^I6E'Nl#	GGI.D,??#(;;yVWV^V^#_i  %Dahh Wf$)KKEMMRSRZRZ$[j!'(3.!&VAHH!EE++$!&T!((!CE+ %r   c                    UR                   R                  5       Ul         UR                  R                  5       Ul        U R                  U   nUR                  nU R	                  X4U5      nUS==   S-  ss'   US   nUS   S:  a#  [
        R                  " UUS   UUS   5      u  pnOSnUS   R                  [        R                  :X  a[  [
        R                  " U R                  UUUS   US   S	   US
   UUS   S US   S   SSUS   UUS   S:  a  US   OS US   US   S9  g US   R                  [        R                  :X  a  US   (       du  [
        R                  " U R                  UUUS   S US   S	   US   S   US
   UUS   US   S US   S US   S US   UUS   S:  a  US   OS US   S9  US   US   sUS'   US'   g US   R                  [        R                  :X  a]  US   (       aR  [
        R                  " U R                  UUUS   S US   S	   US   S   SSUS
   UUS   US   S US   S US   UUS   S9  g g g )Nr   r  r   r  ra   r   r[   r   r   r   r   r   r   r   rb   r   r!  r   rY   rW   r_   )r   r]   r#  )r$  r%  r   rz   r   rh   r   r   r@   r  r&  r	  r   r'  r(  r)  s               r   r   Optimizer1State.update_step  s    ""$""$

1vv7fV}'(3.565J5Jk",-	62M{ K?  EKK/$$##hw"utw"~&&,[&9C&?k"T -!,/#( 8_""ekk1&:N####hw"w"utgfj!~&&,[&9C&?k"T -). 05Z/@%-,E&M5,8_""ekk1f\6J--##hw"w"utgi ~&'!,/' 7K1r   r-  )r.  )r/  r   r0  r   r   Nr1  r  Tr   FF)r   r   r   r   r   r@   r   r   r   r   r   r   s   @r   r4  r4  O  si    
 K-Z ]]_)D )DV ]]_Z Zr   r4  )collectionsr   r   r   copyr   	itertoolsr   typingr   r@   bitsandbytes.functional
functionalrh   bitsandbytes.utilsr   r
   r   optim	OptimizerrT   r   r4  r   r   r   <module>rD     sk   
 ;     # '2 2XN XNvg7EKK)) g7Tqm qhUm Ur   