
    ;i                         S SK r S SKrSSKJrJr  SSKJrJrJrJ	r	  \	" 5       (       a  S SK
Js  Jr  S SKJr  S r " S S\R$                  R&                  5      rS\4S	 jrg)
    N   )AcceleratorStateGradientState)DistributedType
honor_typeis_lomo_availableis_torch_xla_availablec                 ~  ^ [        U [        [        45      (       a  [        U U4S jU  5       5      $ [        U [        5      (       a?  [        U 5      " U R                  5        VVs0 s H  u  p#U[        UT5      _M     snn5      $ [        U [        R                  5      (       a  U R                  T5      $ U $ s  snnf )Nc              3   <   >#    U  H  n[        UT5      v   M     g 7fN)move_to_device).0tdevices     d/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/accelerate/optimizer.py	<genexpr>!move_to_device.<locals>.<genexpr>   s     !KU.F";";Us   )
isinstancelisttupler   dicttypeitemsr   torchTensorto)stater   kvs    `  r   r   r      s    %$''%!KU!KLL	E4	 	 E{U[[]S]TQA~a88]STT	E5<<	(	(xxL Ts   &B9
c                      \ rS rSrSrSS jr\S 5       r\R                  S 5       r\S 5       r	\	R                  S 5       r	\S	 5       r
\
R                  S
 5       r
S rS rS rSS jrS rS rSS jrS r\S 5       rS rS rSrg)AcceleratedOptimizer&   a~  
Internal wrapper around a torch optimizer.

Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient
accumulation.

Args:
    optimizer (`torch.optim.optimizer.Optimizer`):
        The optimizer to wrap.
    device_placement (`bool`, *optional*, defaults to `True`):
        Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of
        `optimizer` on the right device.
    scaler (`torch.amp.GradScaler` or `torch.cuda.amp.GradScaler`, *optional*):
        The scaler to use in the step function if training with mixed precision.
Nc                 v   Xl         X0l        [        5       U l        [	        5       U l        X l        SU l        U R                  bF  SU l        U R                   R                  U l
        [        X R                   R                  5      U l        U(       a  U R                   R                  5       nU R                  R                  [        R                   :X  a+  ["        R$                  " X@R                  R&                  5        O[)        X@R                  R&                  5      nU R                   R+                  U5        g g NF)	optimizerscalerr   accelerator_stater   gradient_statedevice_placement_is_overflow_accelerate_step_calledstep_optimizer_original_step_methodpatch_optimizer_step_optimizer_patched_step_method
state_dictdistributed_typer   XLAxmsend_cpu_data_to_devicer   r   load_state_dict)selfr%   r)   r&   r0   s        r   __init__AcceleratedOptimizer.__init__7   s    "!1!3+o 0!;;"+0D(37>>3F3FD02Ft^^M`M`2aD/ 224J%%66/:M:MM**:7M7M7T7TU+J8N8N8U8UV
NN**:6     c                 .    U R                   R                  $ r   r%   r   r6   s    r   r   AcceleratedOptimizer.stateM   s    ~~###r9   c                 $    XR                   l        g r   r;   r6   r   s     r   r   r=   Q   s    $r9   c                 .    U R                   R                  $ r   r%   param_groupsr<   s    r   rB   !AcceleratedOptimizer.param_groupsU   s    ~~***r9   c                 $    XR                   l        g r   rA   )r6   rB   s     r   rB   rC   Y   s    &2#r9   c                 .    U R                   R                  $ r   r%   defaultsr<   s    r   rG   AcceleratedOptimizer.defaults]   s    ~~&&&r9   c                 $    XR                   l        g r   rF   )r6   rG   s     r   rG   rH   a   s    "*r9   c                 :    U R                   R                  U5        g r   )r%   add_param_group)r6   param_groups     r   rK   $AcceleratedOptimizer.add_param_groupe   s    &&{3r9   c                     U R                   R                  [        R                  :X  a;  U R                  (       a*  [
        R                  " XR                   R                  5        U R                  R                  U5        g r   )
r'   r1   r   r2   r)   r3   r4   r   r%   r5   )r6   r0   s     r   r5   $AcceleratedOptimizer.load_state_dicth   sP    !!22o6I6IIdNcNc&&z3I3I3P3PQ&&z2r9   c                 6    U R                   R                  5       $ r   )r%   r0   r<   s    r   r0   AcceleratedOptimizer.state_dictm   s    ~~((**r9   c                 F   U R                   R                  (       a  S[        R                  " U R                  R
                  5      R                  ;   nU(       a  Uc  SnU R                  R                  US9  g Ub  [        S5      eU R                  R                  5         g g )Nset_to_noneT)rS   zJ`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.)r(   sync_gradientsinspect	signaturer%   	zero_grad
parameters
ValueError)r6   rS   
accept_args      r   rW   AcceleratedOptimizer.zero_gradp   s    --&'*;*;DNN<T<T*U*`*``J&"&K(([(A*$%qrr((* .r9   c                    [        U R                  S5      (       a?  [        U R                  R                  5      (       a  U R                  R                  5         g[        U R                  S5      (       az  [        U R                  R                  S5      (       aT  [        U R                  R                  R                  5      (       a%  U R                  R                  R                  5         gggg)zP
Sets the optimizer to "train" mode. Useful for optimizers like `schedule_free`
trainr%   N)hasattrr%   callabler]   r<   s    r   r]   AcceleratedOptimizer.train|   s     4>>7++9M9M0N0NNN  "DNNK0000'::117788 NN$$**, 9 ; 1r9   c                     [        U R                  S5      (       a@  [        U R                  R                  5      (       a  U R                  R                  5         ggg)zO
Sets the optimizer to "eval" mode. Useful for optimizers like `schedule_free`
evalN)r^   r%   r_   rb   r<   s    r   rb   AcceleratedOptimizer.eval   sB     4>>6**x8K8K/L/LNN! 0M*r9   c                    [        5       (       a  SSKJnJn  U R                  R
                  (       d  U R                  R                  [        R                  :X  a]  [        R                  " U R                  5      n[        R                  " SUS[        R                  " 5       -  S9  SU R                  l        [        5       (       a  [!        U R                  WW45      (       a  g U R                  R"                  (       a  U R$                  b  U R&                  U R                  l        U R$                  R)                  U R                  U5        U R$                  R+                  5         U R,                  (       d  SU l        OSU l        U R0                  U R                  l        SU l        OU R                  R)                  U5        U R                  R                  [        R                  :X  a  SU R                  l        g g )Nr   )AdaLomoLomosumg      ?)scaleTF)r   
lomo_optimre   rf   r(   is_xla_gradients_syncedr'   r1   r   r2   r3   _fetch_gradientsr%   
all_reducexr
world_sizer   rT   r&   r/   r,   updater+   r*   r-   )r6   closurere   rf   	gradientss        r   r,   AcceleratedOptimizer.step   s]   0 ##;;&&77?;N;NN++DNN;IMM%#2GH:>D7$..4/::--{{&&*&I&I#  9""$33(,D%(-D%&*&J&J#/4,##G,!!22o6I6II:?D7 Jr9   c                     U R                   R                   H*  nUS    Vs/ s H  o1R                  X35      PM     snUS'   M,     g s  snf )Nparams)r%   rB   get)r6   parameters_maprL   ps       r   _switch_parameters'AcceleratedOptimizer._switch_parameters   sC    >>66KGRS[G\$]G\!%7%7%=G\$]K! 7$]s   Ac                     U R                   $ )z.Whether or not the optimizer step was skipped.)r*   r<   s    r   step_was_skipped%AcceleratedOptimizer.step_was_skipped   s        r9   c                     / SQnU R                   R                  5        VVs0 s H  u  p#X!;  d  M  X#_M     snn$ s  snnf )N)r+   r-   r/   )__dict__r   )r6   _ignored_keysr   r   s       r   __getstate__!AcceleratedOptimizer.__getstate__   s=    

 "&!4!4!6Q!6!:P!6QQQs   ;;c                     U R                   R                  U5        U R                  bG  SU l        U R                  R
                  U l        [        X R                  R
                  5      U l        g g r$   )	r~   ro   r&   r+   r%   r,   r-   r.   r/   r?   s     r   __setstate__!AcceleratedOptimizer.__setstate__   sT    U#;;"+0D(37>>3F3FD02Ft^^M`M`2aD/ #r9   )	r+   r*   r-   r/   r'   r)   r(   r%   r&   )TNr   )__name__
__module____qualname____firstlineno____doc__r7   propertyr   setterrB   rG   rK   r5   r0   rW   r]   rb   r,   rx   r{   r   r   __static_attributes__ r9   r   r!   r!   &   s     7, $ $ \\% % + + 3 3 ' ' __+ +43
+
+-"$@L^ ! !Rbr9   r!   accelerated_optimizerc                    ^ ^ U U4S jnU$ )Nc                  "   > STl         T" U 0 UD6$ )NT)r+   )argskwargsr   methods     r   patched_step*patch_optimizer_step.<locals>.patched_step   s    8<5t&v&&r9   r   )r   r   r   s   `` r   r.   r.      s    ' r9   )rU   r   r   r   r   utilsr   r   r   r	   torch_xla.core.xla_modelcore	xla_modelr3   torch_xla.runtimeruntimerm   r   optim	Optimizerr!   r.   r   r9   r   <module>r      sW      2 Y Y ))"gb5;;00 gbT0D r9   