
    ;iL$                     |    S SK r S SKJr  S SKJr   " S S\5      r " S S\5      r " S S	\5      r " S
 S\5      rg)    N)	Optimizer)Optimizer1Statec                   >   ^  \ rS rSr         SU 4S jjrSrU =r$ )LARS   c                 Z   > US:X  a  [        S5      e[        TU ]	  SUUX44SUUUU	U
USS9  g)a  
Base LARS optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`):
        The learning rate.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    dampening (`float`, defaults to 0):
        The dampening value reduces the momentum of the optimizer.
    weight_decay (`float`, defaults to 1e-2):
        The weight decay value for the optimizer.
    nesterov (`bool`, defaults to `False`):
        Whether to use Nesterov momentum.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    max_unorm (`float`, defaults to 0.02):
        The maximum gradient norm.
r   'LARS without momentum is not supported!lars        F	max_unorm
block_wiseNNotImplementedErrorsuper__init__)selfparamslrmomentum	dampeningweight_decaynesterov
optim_bitsargsmin_8bit_sizepercentile_clippingr   	__class__s               g/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/optim/lars.pyr   LARS.__init__   sR    R q=%&OPP! 	 	
     )	r   r   r   F    N   d   {Gz?__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__r   s   @r   r   r      s*    
 8
 8
r!   r   c                   <   ^  \ rS rSr        SU 4S jjrSrU =r$ )LARS8bitG   c                 Z   > US:X  a  [        S5      e[        TU ]	  SUUX44SUSUUU	U
SS9  g)	a%  
8-bit LARS optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`):
        The learning rate.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    dampening (`float`, defaults to 0):
        The dampening value reduces the momentum of the optimizer.
    weight_decay (`float`, defaults to 1e-2):
        The weight decay value for the optimizer.
    nesterov (`bool`, defaults to `False`):
        Whether to use Nesterov momentum.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    max_unorm (`float`, defaults to 0.02):
        The maximum gradient norm.
r   r	   r
   r      Fr   Nr   r   r   r   r   r   r   r   r   r   r   r   r   s              r   r   LARS8bit.__init__H   sR    L q=%&OPP! 	 	
r!   r"   r   r   r   FNr$   r%   r&   r'   r.   s   @r   r0   r0   G   '    
 5
 5
r!   r0   c                   <   ^  \ rS rSr        SU 4S jjrSrU =r$ )	LARS32bit   c                 Z   > US:X  a  [        S5      e[        TU ]	  SUUX44SUSUUU	U
SS9  g)	a&  
32-bit LARS optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`):
        The learning rate.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    dampening (`float`, defaults to 0):
        The dampening value reduces the momentum of the optimizer.
    weight_decay (`float`, defaults to 1e-2):
        The weight decay value for the optimizer.
    nesterov (`bool`, defaults to `False`):
        Whether to use Nesterov momentum.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    max_unorm (`float`, defaults to 0.02):
        The maximum gradient norm.
r   r	   r
   r   r#   Fr   Nr   r4   s              r   r   LARS32bit.__init__   sR    L q=%&OPP! 	 	
r!   r"   r6   r'   r.   s   @r   r9   r9      r7   r!   r9   c                   v   ^  \ rS rSr      SU 4S jjrU 4S jr\R                  " 5       SS j5       rSr	U =r
$ )PytorchLARS   c           	         > US:  a  [        SU 35      eUS:  a  [        SU 35      eUS:  a  [        SU 35      e[        UUUUUUS9nU(       a  US::  d  US:w  a  [        S5      e[        T	U ]  X5        g )Nr   zInvalid learning rate: zInvalid momentum value: zInvalid weight_decay value: )r   r   r   r   r   r   r   z8Nesterov momentum requires a momentum and zero dampening)
ValueErrordictr   r   )
r   r   r   r   r   r   r   r   defaultsr   s
            r   r   PytorchLARS.__init__   s     86rd;<<c>7zBCC#;L>JKK%
 Q)q.WXX*r!   c                 n   > [         TU ]  U5        U R                   H  nUR                  SS5        M     g )Nr   F)r   __setstate__param_groups
setdefault)r   stategroupr   s      r   rF   PytorchLARS.__setstate__   s0    U#&&EZ/ 'r!   c                 z   SnUb%  [         R                  " 5          U" 5       nSSS5        U R                   GHm  nUS   nUS   nUS   nUS   nUS   nUS   n	US    GHA  n
U
R                  c  M  U R                  U
   nU
R                  nUS	:w  a  UR                  XS
9nUS	:w  aq  UR                  SS5      nUc)  [         R                  " U5      R                  5       nXS'   O"UR                  U5      R                  USU-
  S
9  U(       a  XU-  -   nOUnSnUS:  ak  U
R                  [         R                  :X  d   e[         R                  " U
R                  5       5      n[         R                  " W5      nUUU-  :  a  UU-  U-  nU
R                  WU	* U-  S
9  GMD     GMp     U$ ! , (       d  f       GN= f)zPerforms a single optimization step.

Args:
    closure (callable, optional): A closure that reevaluates the model
        and returns the loss.
Nr   r   r   r   r   r   r   r   )alphamomentum_buffer   g      ?r   )torchenable_gradrG   gradrI   addgetclonedetachmul_add_dtypefloat32norm)r   closurelossrJ   r   r   r   r   r   r   prI   d_pbufupdateupdate_scalepnormunorms                     r   stepPytorchLARS.step   s    ""$y % &&E 0LZ(Hk*IZ(Hk*ItB8_66>

1ff1$''!'8Cq=))$5t<C{#kk#.55736/0*//1y=/I!$X~!5!$"s?77emm333!JJqxxz2E!JJv.Ey500'05'85'@vbS<%78? % 'R Y %$s   F++
F:r"   )g{Gz?r   r   r   Fr&   )N)r(   r)   r*   r+   r   rF   rP   no_gradre   r,   r-   r.   s   @r   r>   r>      s;     +:0
 ]]_5 5r!   r>   )	rP   torch.optimr   bitsandbytes.optim.optimizerr   r   r0   r9   r>   r"   r!   r   <module>rj      sE   
  ! 89
? 9
x6
 6
r6
 6
rY) Yr!   