
    ;ij                     R    S SK Jr   " S S\5      r " S S\5      r " S S\5      rg)	    )Optimizer1Statec                   B   ^  \ rS rSr           SU 4S jjrSrU =r$ )RMSprop   c                    > US:X  a  [        S5      eU(       a  [        S5      e[        TU ]	  SUUX64UUUU	U
UU5        g)a  
Base RMSprop optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-2):
        The learning rate.
    alpha (`float`, defaults to 0.99):
        The alpha value is the decay rate of the squared gradients of the optimizer.
    eps (`float`, defaults to 1e-8):
        The epsilon value prevents division by zero in the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    centered (`bool`, defaults to `False`):
        Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   )RMSprop with alpha==0.0 is not supported!"Centered RMSprop is not supported!rmspropNNotImplementedErrorsuper__init__)selfparamslralphaepsweight_decaymomentumcentered
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wise	__class__s                j/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/optim/rmsprop.pyr   RMSprop.__init__	   sY    X A:%&QRR%&JKK	
     ){Gz?Gz?:0yE>r   r   F    N   d   T__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__r   s   @r   r   r      s0     <
 <
r   r   c                   @   ^  \ rS rSr          SU 4S jjrSrU =r$ )RMSprop8bitH   c                    > US:X  a  [        S5      eU(       a  [        S5      e[        TU ]	  SUUX64UUSUU	U
U5        g)a  
8-bit RMSprop optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-2):
        The learning rate.
    alpha (`float`, defaults to 0.99):
        The alpha value is the decay rate of the squared gradients of the optimizer.
    eps (`float`, defaults to 1e-8):
        The epsilon value prevents division by zero in the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    centered (`bool`, defaults to `False`):
        Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   r   r	   r
   r   Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   s               r   r   RMSprop8bit.__init__I   sY    V A:%&QRR%&JKK	
r   r    
r!   r"   r#   r   r   FNr%   r&   Tr'   r.   s   @r   r0   r0   H   s-     ;
 ;
r   r0   c                   @   ^  \ rS rSr          SU 4S jjrSrU =r$ )RMSprop32bit   c                    > US:X  a  [        S5      eU(       a  [        S5      e[        TU ]	  SUUX64UUSUU	U
U5        g)a  
32-bit RMSprop optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-2):
        The learning rate.
    alpha (`float`, defaults to 0.99):
        The alpha value is the decay rate of the squared gradients of the optimizer.
    eps (`float`, defaults to 1e-8):
        The epsilon value prevents division by zero in the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    centered (`bool`, defaults to `False`):
        Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   r   r	   r
   r$   Nr   r3   s               r   r   RMSprop32bit.__init__   sY    X A:%&QRR%&JKK	
r   r    r5   r'   r.   s   @r   r7   r7      s-     <
 <
r   r7   N)bitsandbytes.optim.optimizerr   r   r0   r7   r    r   r   <module>r<      s0   
 9=
o =
@<
/ <
~=
? =
r   