
    9i%                         d dl Z d dlZd dlZddlmZmZ ddlmZmZm	Z	  e	d      r	d dl
mc mZ d Z G d d	ej                  j                         Zd
efdZy)    N   )AcceleratorStateGradientState)DistributedType
honor_typeis_tpu_availableF)check_devicec                 Z   t        | t        t        f      rt        | fd| D              S t        | t              r= t        |       | j                         D ci c]  \  }}|t        |       c}}      S t        | t        j                        r| j                        S | S c c}}w )Nc              3   6   K   | ]  }t        |        y wN)move_to_device).0tdevices     ^/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/accelerate/optimizer.py	<genexpr>z!move_to_device.<locals>.<genexpr>   s     !K.F";!Ks   )
isinstancelisttupler   dicttypeitemsr   torchTensorto)stater   kvs    `  r   r   r      s    %$'%!KU!KLL	E4	 tE{U[[]STQA~a88STT	E5<<	(xxL Ts   B'
c                      e Zd ZdZddZed        Zej                  d        Zed        Zej                  d        Zed        Z	e	j                  d	        Z	d
 Z
d Zd ZddZddZd Zed        Zed        Zd Zd Zy)AcceleratedOptimizera  
    Internal wrapper around a torch optimizer.

    Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient
    accumulation.

    Args:
        optimizer (`torch.optim.optimizer.Optimizer`):
            The optimizer to wrap.
        device_placement (`bool`, *optional*, defaults to `True`):
            Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of
            `optimizer` on the right device.
        scaler (`torch.cuda.amp.grad_scaler.GradScaler`, *optional*):
            The scaler to use in the step function if training with mixed precision.
    Nc                 r   || _         || _        t               | _        t	               | _        || _        d| _        | j                  Gd| _        | j                   j                  | _
        t        | | j                   j                        | _        |r| j                   j                         }| j                  j                  t        j                   k(  r+t#        j$                  || j                  j&                         n t)        || j                  j&                        }| j                   j+                  |       y y NF)	optimizerscalerr   accelerator_stater   gradient_statedevice_placement_is_overflow_accelerate_step_calledstep_optimizer_original_step_methodpatch_optimizer_step_optimizer_patched_step_method
state_dictdistributed_typer   TPUxmsend_cpu_data_to_devicer   r   load_state_dict)selfr#   r'   r$   r.   s        r   __init__zAcceleratedOptimizer.__init__7   s    "!1!3+o 0!;;"+0D(37>>3F3FD02FtT^^M`M`2aD/ 224J%%66/:M:MM**:t7M7M7T7TU+J8N8N8U8UV
NN**:6     c                 .    | j                   j                  S r   r#   r   r4   s    r   r   zAcceleratedOptimizer.stateM   s    ~~###r6   c                 &    || j                   _        y r   r8   r4   r   s     r   r   zAcceleratedOptimizer.stateQ   s    $r6   c                 .    | j                   j                  S r   r#   param_groupsr9   s    r   r>   z!AcceleratedOptimizer.param_groupsU   s    ~~***r6   c                 &    || j                   _        y r   r=   )r4   r>   s     r   r>   z!AcceleratedOptimizer.param_groupsY   s    &2#r6   c                 .    | j                   j                  S r   r#   defaultsr9   s    r   rB   zAcceleratedOptimizer.defaults]   s    ~~&&&r6   c                 &    || j                   _        y r   rA   )r4   rB   s     r   rB   zAcceleratedOptimizer.defaultsa   s    "*r6   c                 :    | j                   j                  |       y r   )r#   add_param_group)r4   param_groups     r   rE   z$AcceleratedOptimizer.add_param_groupe   s    &&{3r6   c                     | j                   j                  t        j                  k(  r6| j                  r*t        j                  || j                   j                         | j                  j                  |       y r   )
r%   r/   r   r0   r'   r1   r2   r   r#   r3   )r4   r.   s     r   r3   z$AcceleratedOptimizer.load_state_dicth   sR    !!22o6I6IIdNcNc&&z43I3I3P3PQ&&z2r6   c                 6    | j                   j                         S r   )r#   r.   r9   s    r   r.   zAcceleratedOptimizer.state_dictm   s    ~~((**r6   c                 0   | j                   j                  rdt        j                  | j                  j
                        j                  v }|r!|d}| j                  j                  |       y |t        d      | j                  j                          y y )Nset_to_noneF)rJ   zJ`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.)r&   sync_gradientsinspect	signaturer#   	zero_grad
parameters
ValueError)r4   rJ   
accept_args      r   rN   zAcceleratedOptimizer.zero_gradp   s    --&'*;*;DNN<T<T*U*`*``J&"'K(([(A*$%qrr((* .r6   c                 V   | j                   j                  r| j                  j                  t        j
                  k(  r*|d|ini }t        j                  | j                  |       y | j                  | j                  | j                  _        | j                  j                  | j                  |       | j                  j                          | j                  sd| _        nd| _        | j                  | j                  _        d| _        y | j                  j                  |       y y )Nclosure)optimizer_argsTF)r&   rK   r%   r/   r   r0   r1   optimizer_stepr#   r$   r-   r*   updater)   r(   r+   )r4   rS   rT   s      r   r*   zAcceleratedOptimizer.step|   s    --%%66/:M:MM9@9L)W!5RT!!$..P(&*&I&I#  9""$33(,D%(-D%&*&J&J#/4,##G,) .r6   c                     | j                   j                  D ](  }|d   D cg c]  }|j                  ||       c}|d<   * y c c}w )Nparams)r#   r>   get)r4   parameters_maprF   ps       r   _switch_parametersz'AcceleratedOptimizer._switch_parameters   sH    >>66 	^KGRS[G\$]!^%7%71%=$]K!	^$]s   Ac                 N    t        j                  dt               | j                  S )zTWhether or not the optimizer step was done, or skipped because of gradient overflow.zThe `is_overflow` property is deprecated and will be removed in version 1.0 of Accelerate use `optimizer.step_was_skipped` instead.)warningswarnFutureWarningr(   r9   s    r   is_overflowz AcceleratedOptimizer.is_overflow   s&     	4	

    r6   c                     | j                   S )z.Whether or not the optimizer step was skipped.)r(   r9   s    r   step_was_skippedz%AcceleratedOptimizer.step_was_skipped   s        r6   c                 |    g d}| j                   j                         D ci c]  \  }}||vs|| c}}S c c}}w )N)r)   r+   r-   )__dict__r   )r4   _ignored_keysr   r   s       r   __getstate__z!AcceleratedOptimizer.__getstate__   s;    

 "&!4!4!6QA!=:P1QQQs   88c                     | j                   j                  |       | j                  Hd| _        | j                  j
                  | _        t        | | j                  j
                        | _        y y r"   )	re   rV   r$   r)   r#   r*   r+   r,   r-   r;   s     r   __setstate__z!AcceleratedOptimizer.__setstate__   sV    U#;;"+0D(37>>3F3FD02FtT^^M`M`2aD/ #r6   )TNr   )__name__
__module____qualname____doc__r5   propertyr   setterr>   rB   rE   r3   r.   rN   r*   r\   ra   rc   rg   ri    r6   r   r    r    &   s     7, $ $ \\% % + + 3 3 ' ' __+ +43
+
+-.^ ! ! ! !Rbr6   r    accelerated_optimizerc                       fd}|S )Nc                  "    d_          | i |S )NT)r)   )argskwargsrq   methods     r   patched_stepz*patch_optimizer_step.<locals>.patched_step   s    8<5t&v&&r6   rp   )rq   rv   rw   s   `` r   r,   r,      s    ' r6   )rL   r^   r   r   r   r   utilsr   r   r   torch_xla.core.xla_modelcore	xla_modelr1   r   optim	Optimizerr    r,   rp   r6   r   <module>r~      sV       2 @ @ '))Mb5;;00 Mb`0D r6   