
    ;i                     R    S SK Jr   " S S\5      r " S S\5      r " S S\5      rg)	    )Optimizer1Statec                   @   ^  \ rS rSr          SU 4S jjrSrU =r$ )Adagrad   c                    > SU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eUS:w  a  [        S5      eUS:w  a  [        S5      e[        TU ]	  SUUSUUUUU	U
U5        g	)
a  
Base Adagrad optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-2):
        The learning rate.
    lr_decay (`int`, defaults to 0):
        The learning rate decay.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    initial_accumulator_value (`int`, defaults to 0):
        The initial momemtum values.
    eps (`float`, defaults to 1e-10):
        The epsilon value prevents division by zero in the optimizer.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        Invalid learning rate: Invalid weight_decay value: Invalid epsilon value: /Initial accumulator value != 0.0 not supported!Lr Decay != 0.0 not supported!adagradr   r   N
ValueErrorsuper__init__selfparamslrlr_decayweight_decayinitial_accumulator_valueeps
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wise	__class__s               j/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/optim/adagrad.pyr   Adagrad.__init__	   s    R by6rd;<<l";L>JKKcz6se<==$+NOOs?=>>	
     
{Gz?r   r   r   绽|=    N   d   T__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__r!   s   @r"   r   r      -     "#?
 ?
r$   r   c                   @   ^  \ rS rSr          SU 4S jjrSrU =r$ )Adagrad8bitK   c                   > SU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eUS:w  a  [        S5      eUS:w  a  [        S5      eU(       d   e[        TU ]	  SUUSUUS	UU	U
U5        g
)a  
8-bit Adagrad optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-2):
        The learning rate.
    lr_decay (`int`, defaults to 0):
        The learning rate decay.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    initial_accumulator_value (`int`, defaults to 0):
        The initial momemtum values.
    eps (`float`, defaults to 1e-10):
        The epsilon value prevents division by zero in the optimizer.
    optim_bits (`int`, defaults to 8):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   r	   r
   r   r   r   r   r   r   Nr   r   s               r"   r   Adagrad8bit.__init__L   s    R by6rd;<<l";L>JKKcz6se<==$+NOOs?=>>z	
r$   r%   )
r'   r   r   r   r(   r   Nr*   r+   Tr,   r3   s   @r"   r6   r6   K   s/     "#@
 @
r$   r6   c                   @   ^  \ rS rSr          SU 4S jjrSrU =r$ )Adagrad32bit   c                    > SU::  d  [        SU 35      eSU::  d  [        SU 35      eSU::  d  [        SU 35      eUS:w  a  [        S5      eUS:w  a  [        S5      e[        TU ]	  SUUSUUS	UU	U
U5        g
)a  
32-bit Adagrad optimizer.

Arguments:
    params (`torch.tensor`):
        The input parameters to optimize.
    lr (`float`, defaults to 1e-2):
        The learning rate.
    lr_decay (`int`, defaults to 0):
        The learning rate decay.
    weight_decay (`float`, defaults to 0.0):
        The weight decay value for the optimizer.
    initial_accumulator_value (`int`, defaults to 0):
        The initial momemtum values.
    eps (`float`, defaults to 1e-10):
        The epsilon value prevents division by zero in the optimizer.
    optim_bits (`int`, defaults to 32):
        The number of bits of the optimizer state.
    args (`object`, defaults to `None`):
        An object with additional arguments.
    min_8bit_size (`int`, defaults to 4096):
        The minimum number of elements of the parameter tensors for 8-bit optimization.
    percentile_clipping (`int`, defaults to 100):
        Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
    block_wise (`bool`, defaults to `True`):
        Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
r   r	   r
   r   r   r   r   r   r)   Nr   r   s               r"   r   Adagrad32bit.__init__   s    R by6rd;<<l";L>JKKcz6se<==$+NOOs?=>>	
r$   r%   r&   r,   r3   s   @r"   r;   r;      r4   r$   r;   N)bitsandbytes.optim.optimizerr   r   r6   r;   r%   r$   r"   <module>r@      s6   
 9@
o @
FA
/ A
H@
? @
r$   