
    o9i1                        d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
  G d de
      Z G d de
      Z G d de
      Z G d	 d
e
      Z G d de
      Z G d de
      Z G d dej"                  j$                        Zy)    N)Optimizer2Statec                   $     e Zd Z	 	 d fd	Z xZS )Adamc                 <    t         |   d||||||||	|
||       y )Nadamis_pagedsuper__init__selfparamslrbetasepsweight_decayamsgrad
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wiser	   	__class__s                a/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/bitsandbytes/optim/adam.pyr   zAdam.__init__   s9    &&"eS,
TXZgi|  I  T\  	]    MbP?g?g+?:0yE>r   F    Ni   d   TF__name__
__module____qualname__r   __classcell__r   s   @r   r   r      s    prin] ]r   r   c                   $     e Zd Z	 	 d fd	Z xZS )Adam8bitc                 <    t         |   d|||||d||	|
||       y )Nr      r   r
   r   s                r   r   zAdam8bit.__init__   s4    &&"eS,4Q^`su  KS  	Tr   r   r#   r(   s   @r   r*   r*      s    prinT Tr   r*   c                   $     e Zd Z	 	 d fd	Z xZS )	Adam32bitc                 <    t         |   d|||||d||	|
||       y )Nr   r!   r   r
   r   s                r   r   zAdam32bit.__init__   s8    &&"eS,DR_at  wA  LT  	Ur   r   r#   r(   s   @r   r.   r.      s    prinU Ur   r.   c                   $     e Zd Z	 	 d fd	Z xZS )	PagedAdamc                 <    t         |   d||||||||	|
|d       y )Nr   Tr   r
   r   s                r   r   zPagedAdam.__init__    s9    &&"eS,
TXZgi|  I  TX  	Yr   r   r#   r(   s   @r   r1   r1      s    prinY Yr   r1   c                   $     e Zd Z	 	 d fd	Z xZS )PagedAdam8bitc                 <    t         |   d|||||d||	|
|d       y )Nr   r,   Tr   r
   r   s                r   r   zPagedAdam8bit.__init__%   s4    &&"eS,4Q^`su  KO  	Pr   r   r#   r(   s   @r   r4   r4   $   s    prinP Pr   r4   c                   $     e Zd Z	 	 d fd	Z xZS )PagedAdam32bitc                 <    t         |   d|||||d||	|
|d       y )Nr   r!   Tr   r
   r   s                r   r   zPagedAdam32bit.__init__*   s8    &&"eS,DR_at  wA  LP  	Qr   r   r#   r(   s   @r   r7   r7   )   s    prinQ Qr   r7   c                   Z     e Zd ZdZ	 	 	 	 	 	 	 d fd	Zed        Zed        ZddZ xZ	S )AnalysisAdama  Adam that performs 8-bit vs 32-bit error analysis.

    This implementation is modified from torch.optim.Adam based on:
    `Fixed Weight Decay Regularization in Adam`
    (see https://arxiv.org/abs/1711.05101)

    It has been proposed in `Adam: A Method for Stochastic Optimization`_.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_

    .. _Adam: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ
    c	                 b    t        |||||      }	t        
| 	  ||	       || _        || _        y )N)r   r   r   r   r   )dictr   r   analysissavedir)r   r   r   r   r   r   r   bnb_analysisr>   defaultsr   s             r   r   zAnalysisAdam.__init__I   s=     %
 	*$r   c                      yNT r   s    r   supports_memory_efficient_fp16z+AnalysisAdam.supports_memory_efficient_fp16_       r   c                      yrB   rC   rD   s    r   supports_flat_paramsz!AnalysisAdam.supports_flat_paramsc   rF   r   c           
      ^   d}| |       }| j                   D 	]  }t        |d         D 	]w  \  }}|j                  |j                  j                  }|j                  t
        j                  t
        j                  hv r|j                         }|j                  rt        d      |j                  dd      }|rJ |j                  }|j                  j                  t
        j                  t
        j                  hv r|j                         }| j                  |   }	t        |	      dk(  rd|	d<   t        j                  |      |	d<   t        j                  |      |	d	<   t        j                  d
|j                         |	d<   t        j                  d
|j                         |	d<   t        j                  d
|j                         |	d<   |r`t        j                  |      |	d<   nG|	d   j#                  |      |	d<   |	d	   j#                  |      |	d	<   |r|	d   j#                  |      |	d<   |	dxx   dz  cc<   |d   \  }
}d|
|	d   z  z
  }d||	d   z  z
  }|d   t%        j&                  |      z  |z  }|	d   }|	d   }|	d   }|d   dk7  r|j)                  ||d    |d   z         |	d   |	d	   }}|r|	d   }|j+                  |
      j)                  |d|
z
         |j+                  |      j-                  ||d|z
         |j'                         j)                  |d         }||z  }|j/                         dk  s|j/                         dkD  r|| |z  z  }n| j0                  dk(  rt3        j4                  d      j#                  |j                         }t3        j4                  d      j#                  |j                         }t3        j6                  ||      \  }}t3        j8                  ||      }t3        j6                  ||      \  }}t3        j8                  ||      }nZ| j0                  dk(  rt3        j4                  d      j#                  |j                         }t3        j4                  d      j#                  |j                         }t3        j:                  ||      \  }}t3        j<                  ||      }t3        j:                  ||      \  }}t3        j<                  ||      }n| j0                  dk(  rt3        j>                  d      j#                  |j                         }t3        j>                  d      j#                  |j                         }t3        j:                  ||      \  }}t3        j<                  ||      }t3        j:                  ||      \  }}t3        j<                  ||      }n| j0                  dk(  rt3        j@                  |      }t3        j@                  |      }t3        jB                  ||      }t3        jD                  ||      }t3        jB                  ||      }t3        jD                  ||      }n)| j0                  d k(  rntG        d!| j0                   d"      j'                         j)                  |d         }|z  }t        jH                  ||z
        } | t        jH                  |d#z         z  }!jK                         jK                         }}t3        jL                  ||jK                         |jK                         |        t3        jL                  ||jK                         |jK                         |!       t3        jL                  ||jK                         |jK                         t        jN                  |              || |z  z  }tQ        jR                         rtQ        jT                         dk(  rh| jV                  d$k7  rX|	d   d%z  dk(  rLtX        jZ                  j]                  | jV                        stY        j^                  | jV                         d&ja                  |jb                  D "cg c]  }"te        |"       c}"      }#tX        jZ                  ja                  | jV                  | d&|# d'      }$tX        jZ                  ja                  | jV                  | d&|# d(      }%tX        jZ                  ja                  | jV                  | d&|# d)      }&t        jf                  ||$       t        jf                  ||%       t        jf                  ||&       |j                  j                  t
        j                  t
        j                  hv s	]|j                  ji                  |       	z 	 |S c c}"w )*zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   zJAdam does not support sparse gradients, please consider SparseAdam insteadr   Fr   stepexp_avg
exp_avg_sq)   rM   )device	abserrors	relerrorscountsmax_exp_avg_sq   r   r   r   )alpha)valuer   i    idynamic-blockwiseT)signed)codedynamiclinearquantilezmy-quantization-routinezInvalid analysis value: !gư> r"   _z_abserr.pklz_relerr.pklz_counts.pkl)5param_groups	enumerategraddatadtypetorchfloat16bfloat16float	is_sparseRuntimeErrorgetstatelen
zeros_likezerosrN   tomathsqrtadd_mul_addcmul_numelr=   Fcreate_dynamic_mapquantize_blockwisedequantize_blockwisequantize
dequantizecreate_linear_mapestimate_quantilesquantize_no_absmaxdequantize_no_absmax
ValueErrorabsinthistogram_scatter_add_2d	ones_likedistis_initializedget_rankr>   ospathexistsmakedirsjoinshapestrsavecopy_)'r   closurelossgroupp_idpra   r   p_data_fp32rk   beta1beta2bias_correction1bias_correction2	step_sizeerelerQ   rK   rL   rR   denomupdate_fp32code1code2C1S1state1C2S2state2update_8bitabserrrelerrdimshapestrpathepathrele
pathcountss'                                          r   rJ   zAnalysisAdam.stepg   s0    9D&& ]	.E$U8_5 \.a66>vv{{::%--!@@::<D>>&d   ))Iu5""{ff66<<EMM5>>#BB"-"3"3"5K

1 u:?$%E&M','7'7'DE)$*/*:*:;*GE,').";+=+=*E+& */";+=+=*E+& ',kk";+=+='E(O 272B2B;2O./',Y'7':':;'GE)$*/*=*@*@*ME,'278H2I2L2L'3./ f"$W~u#$uf'=#= #$uf'=#= $K$)),<"==@PP  +&[)x(A-$$#E.,A+AE$K+O %  ',I&6l8K%*+;%<N U#((QY(?&//d!e)/L")..uU|<%o  %%'4/"((*\9  I:#;;K}}(;; ! 4 4D A D DQXX N ! 4 4E B E Eahh O!"!5!5gE!JB!"!7!7B!?!"!5!5ju!MB!"!7!7B!?)3 ! 4 4D A D DQXX N ! 4 4E B E Eahh O!"G%!@B!"b"!5!"JU!CB!"b"!5(2 ! 3 34 @ C CAHH M ! 3 35 A D DQXX N!"G%!@B!"b"!5!"JU!CB!"b"!5*4 ! 4 4W = ! 4 4Z @11'F!"!7!7E!B11*5I!"!7!7E!B*CC )6t}}oQG  #KKM..uU|<E"(5.K"YY{['@AF#eiid0B&CCFVVXrvvxB..q"&&(BFFHfM..tRVVXrvvxP.."&&(EOOF4K  I:#;;K..0DMMOq4H<<2-%-#2E2J#%77>>$,,#? "DLL 9'*xx5@5F5F GcS G(H %'GGLL $az.M%E (*ww|| $az.M(H *, $az.M*J "JJq%0!JJtX6!JJvz:66<<EMM5>>#BBFFLL-y\.]	.~ % !Hs   $f*)r   r   r    r   FrV   N)N)
r$   r%   r&   __doc__r   propertyrE   rH   rJ   r'   r(   s   @r   r:   r:   .   sR    : (,    jr   r:   )rp   r   rd   torch.distributeddistributedr   bitsandbytes.functional
functionalrv   bitsandbytes.optim.optimizerr   r   r*   r.   r1   r4   r7   optim	Optimizerr:   rC   r   r   <module>r      s     	    # 8]? ]
T T
U U
Y Y
PO P
Q_ Q
c5;;(( cr   