
    ;i=                     R    S SK Jr   " S S\5      r " S S\5      r " S S\5      rg)	    )Optimizer1Statec                   >   ^  \ rS rSr         SU 4S jjrSrU =r$ )SGD   c                 \   > US:X  a  [        S5      e[        TU ]	  SUUX44SUUUU	U
U5        g)a  
Base SGD optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`):
        The learning rate.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    dampening (`float`, defaults to 0):
        The dampening value reduces the momentum of the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    nesterov (`bool`, defaults to `False`):
        Whether to use Nesterov momentum.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   &SGD without momentum is not supported!momentum        NNotImplementedErrorsuper__init__)selfparamslrr	   	dampeningweight_decaynesterov
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wise	__class__s               f/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/optim/sgd.pyr   SGD.__init__	   sJ    R q=%&NOO!	
     )	r   r   r   F    N   d   T__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__r   s   @r   r   r      s*    
 7
 7
r   r   c                   <   ^  \ rS rSr        SU 4S jjrSrU =r$ )SGD8bitC   c                 \   > US:X  a  [        S5      e[        TU ]	  SUUX44SUSUUU	U
5        g)as  
8-bit SGD optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`):
        The learning rate.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    dampening (`float`, defaults to 0):
        The dampening value reduces the momentum of the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    nesterov (`bool`, defaults to `False`):
        Whether to use Nesterov momentum.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   r   r	   r
   r   Nr   r   r   r   r	   r   r   r   r   r   r   r   r   s              r   r   SGD8bit.__init__D   sJ    L q=%&NOO!	
r   r   r   r   r   FNr    r!   Tr"   r)   s   @r   r+   r+   C   '    
 4
 4
r   r+   c                   <   ^  \ rS rSr        SU 4S jjrSrU =r$ )SGD32bit{   c                 \   > US:X  a  [        S5      e[        TU ]	  SUUX44SUSUUU	U
5        g)at  
32-bit SGD optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`):
        The learning rate.
    momentum (`float`, defaults to 0):
        The momentum value speeds up the optimizer by taking bigger steps.
    dampening (`float`, defaults to 0):
        The dampening value reduces the momentum of the optimizer.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    nesterov (`bool`, defaults to `False`):
        Whether to use Nesterov momentum.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   r   r	   r
   r   Nr   r.   s              r   r   SGD32bit.__init__|   sJ    L q=%&NOO!	
r   r   r0   r"   r)   s   @r   r3   r3   {   r1   r   r3   N)bitsandbytes.optim.optimizerr   r   r+   r3   r   r   r   <module>r8      s0   
 98
/ 8
v5
o 5
p5
 5
r   