
    ;i                    L   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SK	J
r
  S SKJrJr  S SKrSSKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)  SSK*J+r+  \$" 5       (       a  S SK,J-s  J.r/  S SK0J1r2  \" S	S
9(       a  S SK3r3\#" S	S
9(       a  S SK4r4\!" S	S
9(       a  S SK5r5\"" S	S
9(       a  S SK6r6\Rn                  " \85      r9SS jr:S r; " S S\Rx                  5      r=\$" 5       (       d  \>O\=r? " S S5      r@ " S S5      rA " S S5      rBg)    )annotationsN)contextmanager)partial)AnyCallable   )DistributedTypeDynamoBackendGradientAccumulationPlugincheck_cuda_fp8_capabilitycheck_cuda_p2p_ib_supportdeepspeed_requiredget_cpu_distributed_informationget_int_from_envis_ccl_availableis_datasets_availableis_deepspeed_availableis_fp8_availableis_habana_gaudi1is_hpu_availableis_ipex_availableis_mlu_availableis_mps_availableis_musa_availableis_npu_availableis_sdaa_availableis_torch_xla_availableis_xccl_availableis_xpu_availableparse_choice_from_envparse_flag_from_envset_numa_affinity)SageMakerDistributedTypeF)check_devicec                 (    [         R                  0 :g  $ )z
Checks if the `AcceleratorState` has been initialized from `Accelerator`. Same as `AcceleratorState.initialized`,
but works as a module method.
)AcceleratorState_shared_state     `/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/accelerate/state.pyis_initializedr+   O   s    
 ))R//r)   c                     g Nr(   )argskwargss     r*   
do_nothingr0   X   s    r)   c                  6    \ rS rSrSrSS	S jjrS
S jrS rSrg)ThreadLocalSharedDict\   a  
Descriptor that holds a dict shared between instances of a class in the same thread.

Note: Descriptors have slightly different semantics than just a dict field on its own.
`PartialState(...)._shared_state` and `PartialState._shared_state` (instance vs class) give the same value: the
underlying _storage dict. Likewise, `PartialState(...)._shared_state = {...}` overrides the _storage dict inside
the descriptor as you would expect. However, `PartialState._shared_state = {}` actually replaces the descriptor
object with a dict instead Thus, you should modify the _storage dict in-place (e.g. `_shared_state.clear()`).

See Python documentation for an explanation of descriptors: https://docs.python.org/3/howto/descriptor.html

This is required for using PyTorch/XLA with PJRT in multithreaded mode (required for TPU v2 and v3).

See https://github.com/pytorch/xla/blob/r2.0/docs/pjrt.md#multithreading-on-tpu-v2v3
c                    0 U l         g r-   _storage)selfthread_locals     r*   __init__ThreadLocalSharedDict.__init__m   s	    r)   Nc                    U R                   $ r-   r5   )r7   objobjtypes      r*   __get__ThreadLocalSharedDict.__get__p   s    }}r)   c                    X l         g r-   r5   )r7   r<   values      r*   __set__ThreadLocalSharedDict.__set__s   s    r)   r5   F)r8   boolr-   )	__name__
__module____qualname____firstlineno____doc__r9   r>   rB   __static_attributes__r(   r)   r*   r2   r2   \   s     r)   r2   c                     \ rS rSrSr\" 5       r/ SQrSSS jjrS S jr	\
S 5       r\S!S j5       r\S 5       r\S!S	 j5       r\S!S
 j5       r\S!S j5       rS rS"S jr\SS#S jj5       r\S 5       r\S 5       rS$S%S jjrS$S%S jjrS&S jrS'S(S jjrS'S)S jjrS r\S*S j5       r S+     S,S jjrS r S$S jr!S-S jr"Sr#g).PartialState|   a  
Singleton class that has information about the current training environment and functions to help with process
control. Designed to be used when only process control and device execution states are needed. Does *not* need to
be initialized from `Accelerator`.

Args:
    cpu (`bool`, *optional*):
        Whether or not to force the script to execute on CPU. Will ignore any accelerators available if set to
        `True` and force the execution on the CPU.
    kwargs (additional keyword arguments, *optional*):
        Additional keyword arguments to pass to the relevant `init_process_group` function. Valid `kwargs` can be
        found in [`utils.InitProcessGroupKwargs`]. See the example section for detailed usage.

**Available attributes:**

    - **device** (`torch.device`) -- The device to use.
    - **distributed_type** ([`~accelerate.state.DistributedType`]) -- The type of distributed environment currently
      in use.
    - **local_process_index** (`int`) -- The index of the current process on the current server.
    - **mixed_precision** (`str`) -- Whether or not the current script will use mixed precision, and if so the type
      of mixed precision being performed. (Choose from 'no','fp16','bf16 or 'fp8').
    - **num_processes** (`int`) -- The number of processes currently launched in parallel.
    - **process_index** (`int`) -- The index of the current process.
    - **is_last_process** (`bool`) -- Whether or not the current process is the last one.
    - **is_main_process** (`bool`) -- Whether or not the current process is the main one.
    - **is_local_main_process** (`bool`) -- Whether or not the current process is the main one on the local node.
    - **debug** (`bool`) -- Whether or not the current script is being run in debug mode.

Example:
```python
from accelerate.utils import InitProcessGroupKwargs

# To include `InitProcessGroupKwargs`, init then call `.to_kwargs()`
kwargs = InitProcessGroupKwargs(...).to_kwargs()
state = PartialState(**kwargs)
```
)_cpu_mixed_precisionr'   backenddebugdevicedistributed_typefork_launchedlocal_process_indexnum_processesprocess_indexc                8   U R                   U l        U R                  (       Gdf  Xl        S U l        [
        R                  R                  SS 5      nUb  [        R                  " U5      OS U l	        [        S5      U l        UR                  SS 5      nS nUch  [
        R                  R                  SS5      R                  5       S:H  =(       a0    [
        R                  R                  S5      [        R                  :g  nUR                  SS 5      nU R!                  XU5      u  pxUb  Xv:w  a  [#        S	U S
U 35      eXpl        Xl        Sn	U(       Gd  U R                  S:w  Gaz  ['        [
        R                  R                  SS5      5      S:w  GaL  [
        R                  R                  SS5      R                  5       S:X  a  [)        5       (       d  [+        S5      eSSKJn
  U
R1                  5       (       dq  U R                  S:X  aB  [
        R                  R                  SS5      n[        R2                  R5                  SU 35        U
R6                  " S4U R                  SS.UD6  Sn	GOpU R$                  [8        R:                  [8        R<                  4;  GaA  [        R>                  R1                  5       (       Gd  U R                  S:X  aB  [
        R                  R                  SS5      n[        R2                  R5                  SU 35        U R                  S:X  a  [
        R                  R                  SS5      R                  5       S:X  a]  [
        R                  R                  SS5      R                  5       S:X  d$  [
        R                  R                  SS5      S:X  a  SU l        [        R>                  R@                  " S4SU R                  0UD6  U R$                  [8        R:                  [8        R<                  4;   Ga1  [C        5       n[E        URF                  5      [
        R                  S'   [E        URH                  5      [
        R                  S'   [E        URJ                  5      [
        R                  S'   [E        URL                  5      [
        R                  S '   [
        R                  R                  S!S 5      (       d  S"[
        R                  S!'   [
        R                  R                  S#S 5      (       d5  URL                  URH                  :w  a  U R                  S$:w  a  [#        S%5      eURF                  US&'   URH                  US''   U R$                  [8        R<                  :X  ar  [O        S(/S5      S:X  aa  SS K(n['        URS                  SS)9URL                  -  5      nUS:X  a  S*n[        RT                  " U5        [V        RX                  " S+U S,35        [        R>                  R1                  5       (       d,  [        R>                  R@                  " S4SU R                  0UD6  U R                  c,  [8        R                  U l        S*U l-        SU l.        SU l/        GOcU R                  S:X  a  U R5                  5         [`        Rb                  " U R                  [`        Rd                  " 5       5        [f        RH                  " 5       U l-        [f        Rh                  " 5       U l.        [k        SS-9(       a  [`        Rl                  " 5       U l/        O['        [
        R                  R                  SS5      5      U l/        O[        R>                  Ro                  5       U l-        [        R>                  Rq                  5       U l.        Uc)  ['        [
        R                  R                  SS5      5      OURJ                  U l/        U R5                  5         U	(       a  [8        Rr                  U l        [        S.S5      (       a  [u        U R^                  5        U R                  Rv                  S/:X  aB  [y        5       (       d3  S0[
        R                  ;  d  S1[
        R                  ;  a  [{        S25      e[        S3S5      U l>        g )5NACCELERATE_TORCH_DEVICEACCELERATE_DEBUG_MODE_use_sagemaker_dpACCELERATE_USE_SAGEMAKERfalsetrue%ACCELERATE_SAGEMAKER_DISTRIBUTED_TYPErQ   zYour assigned backend z is not available, please use Fxla
LOCAL_RANKACCELERATE_USE_DEEPSPEEDz_DeepSpeed is not available => install it using `pip3 install deepspeed` or build it from sourcer   )commtcclzsdaa:)dist_backendauto_mpi_discoveryTncclACCELERATE_USE_FSDPFSDP_OFFLOAD_PARAMSFSDP_STATE_DICT_TYPESHARDED_STATE_DICTFULL_STATE_DICTzcuda:nccl,cpu:glooRANK
WORLD_SIZELOCAL_WORLD_SIZEMASTER_PORT29500MASTER_ADDRmpizTried to launch on distributed with multinode, but `MASTER_ADDR` env was not set, please try exporting rank 0's hostname as `MASTER_ADDR`rank
world_sizeOMP_NUM_THREADS)logicalr   z4OMP_NUM_THREADS/MKL_NUM_THREADS unset, we set it at z to improve oob performance.check_is_tpuACCELERATE_CPU_AFFINITYcudaNCCL_P2P_DISABLENCCL_IB_DISABLEzUsing RTX 4000 series doesn't support faster communication broadband via P2P or IB. Please set `NCCL_P2P_DISABLE="1"` and `NCCL_IB_DISABLE="1" or use `accelerate launch` which will do this automatically.FORK_LAUNCHEDr(   )?r'   __dict__initializedrO   rQ   osenvirongettorchrS   r!   rR   poplowerr#   NO_prepare_backend
ValueErrorrT   intr   ImportError	deepspeedre   r+   sdaa
set_deviceinit_distributedr	   	MULTI_XPU	MULTI_CPUdistributedinit_process_groupr   strrv   rw   
local_ranklocal_world_sizer   psutil	cpu_countset_num_threadswarningswarnrW   rX   rV   xmset_replicationget_xla_supported_devicesxrglobal_ordinalr   get_local_ordinalget_world_sizeget_rank	DEEPSPEEDr"   typer   NotImplementedErrorrU   )r7   cpur/   
env_deviceuse_sagemaker_dpdist_informationoriginal_backendrQ   rT   use_deepspeeddistr   r   num_cpu_threads_per_processs                 r*   r9   PartialState.__init__   s   **IDL(A4HJ6@6L%,,z2RVDK,-DEDJ%zz*=tD#'JJNN#=wGMMOSYY o

'NOSkSnSnn !  &zz)T:(,(=(=cUe(f%G+0K #9:J9KKijqir!stt"L$4!!M4<<50rzz~~lB78B>zz~~&@'JPPRV\\577"- !B#  ;#2244#||v5-/ZZ^^L"-M
 %

 5 5j\6J K 11pt||`epiop(, --o6O6OQ`QjQj5kk % 1 1 @ @ B B<<61)+b)IJ!JJ11E*2FG LLF2 "

/Dg N T T VZ` ` "

/Dg N T T VZ` `#%::>>2HJ^#_ct#t ,@DL))<<\T\\\U[\ $$)B)BOD]D](^^#B#D %()9)>)>%?

6"+./?/J/J+K

<(+./?/J/J+K

<(145E5V5V1W

-.zz~~mT::07BJJ}-

}d;;(99=M=X=XX-$R  "2!6!6v'7'B'B|$ ))_-F-FF(*;)<a@AE!25(((7:J:[:[[3/ 3a7673))*EFMMNOjNk l( (
 ((7799%%88XXQWX ||#(7(:(:%%&"%&"+,(&!""4;;0L0L0NO%']]_"%'%6%6%8")t</1/C/C/ED,/22::>>,PR3S/TD,%*%6%6%E%E%G"%*%6%6%?%?%A"=M=UC

|R89[k[v[v ( OO(7(A(A% ##<eDD!$":":; {{6)2K2M2M%RZZ7;LTVT^T^;^-6  1!Dr)   c                    SU R                    U R                  (       a  SU R                  -   OS SU R                   SU R                   SU R                   SU R
                   S3$ )	NzDistributed environment: z  Backend:  z
Num processes: z
Process index: z
Local process index: z	
Device: 
)rT   rQ   rW   rX   rV   rS   r7   s    r*   __repr__PartialState.__repr__B  s{    '(=(='>aeamamPTP\P\@\su>v w"001 2"001 2$$($<$<#= >{{m2	'	
r)   c                 @    [         R                  R                  5         gzCResets `_shared_state`, is used internally and should not be calledN)rM   r'   clearr(   r)   r*   _reset_statePartialState._reset_stateK  s     	""((*r)   c                     U R                   0 :g  $ )z7Returns whether the `PartialState` has been initialized)r'   r   s    r*   r   PartialState.initializedP  s     !!R''r)   c                h    U R                   [        R                  :g  =(       a    U R                  S:  $ )@
Whether the Accelerator is configured for distributed training
r   )rT   r	   r   rW   r   s    r*   use_distributedPartialState.use_distributedU  s,    
 $$(:(::Ut?Q?QTU?UUr)   c                :    U R                   U R                  S-
  :H  $ )3Returns whether the current process is the last oner   )rX   rW   r   s    r*   is_last_processPartialState.is_last_process\  s      !!T%7%7!%;;;r)   c                t    U R                   [        R                  :w  a  U R                  S:H  $ U R                  $ )7Returns whether the current process is the main processr   )rT   r	   MEGATRON_LMrX   r   r   s    r*   is_main_processPartialState.is_main_processa  s8     (,'<'<@[@['[D!#	
aeauau	
r)   c                t    U R                   [        R                  :w  a  U R                  S:H  $ U R                  $ )IReturns whether the current process is the main process on the local noder   )rT   r	   r   rV   r   r   s    r*   is_local_main_process"PartialState.is_local_main_processh  s=    
 $$(C(CC $$)	
 %%	
r)   c                   U R                   [        R                  [        R                  [        R                  [        R
                  [        R                  [        R                  [        R                  [        R                  [        R                  [        R                  4
;   a)  [        R                  R                  U R                  /S9  gU R                   [        R                   :X  a  ["        R$                  " S5        gg)aT  
Will stop the execution of the current process until every other process has reached that point (so this does
nothing when the script is only run in one process). Useful to do before saving a model.

Example:

```python
>>> # Assuming two GPU processes
>>> import time
>>> from accelerate.state import PartialState

>>> state = PartialState()
>>> if state.is_main_process:
...     time.sleep(2)
>>> else:
...     print("I'm waiting for the main process to finish its sleep...")
>>> state.wait_for_everyone()
>>> # Should print on every process at the same time
>>> print("Everyone is here")
```
)
device_idsz"accelerate.utils.wait_for_everyoneN)rT   r	   	MULTI_GPU	MULTI_MLU
MULTI_SDAA
MULTI_MUSA	MULTI_NPUr   r   	MULTI_HPUr   FSDPr   r   barrierrV   XLAr   
rendezvousr   s    r*   wait_for_everyonePartialState.wait_for_everyoneq  s    ,   %%%%&&&&%%%%%%%%%%  %
 
 %%$2J2J1K%L""o&9&99MM>? :r)   c              #  r   #    U(       d  U R                  5         S v   U(       a  U R                  5         g g 7fr-   )r   )r7   is_mains     r*   _goes_firstPartialState._goes_first  s+     ""$""$ s   57c              #    ^ ^^^^^#    T R                   S:X  a  Uv   g[        U5      m[        U[        5      (       a[  [        U[	        UR                  5       5      S      5      m[        U4S jUR                  5        5       5      (       d  [        S5      e[        TT R                   5      u  mmT R                  T-  [        T R                  T5      -   nUT-   T R                  T:  a  SOS-   nUUUUU 4S jmT" XU5      v   g7f)a  
Splits `input` between `self.num_processes` quickly and can be then used on that process. Useful when doing
distributed inference, such as with different prompts.

Note that when using a `dict`, all keys need to have the same number of elements.

Args:
    inputs (`list`, `tuple`, `torch.Tensor`, `dict` of `list`/`tuple`/`torch.Tensor`, or `datasets.Dataset`):
        The input to split between processes.
    apply_padding (`bool`, `optional`, defaults to `False`):
        Whether to apply padding by repeating the last element of the input so that all processes have the same
        number of elements. Useful when trying to perform actions such as `gather()` on the outputs or passing
        in less inputs than there are processes. If so, just remember to drop the padded elements afterwards.


Example:

```python
# Assume there are two processes
from accelerate import PartialState

state = PartialState()
with state.split_between_processes(["A", "B", "C"]) as inputs:
    print(inputs)
# Process 0
["A", "B"]
# Process 1
["C"]

with state.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs:
    print(inputs)
# Process 0
["A", "B"]
# Process 1
["C", "C"]
```
r   Nr   c              3  @   >#    U  H  n[        U5      T:H  v   M     g 7fr-   )len).0vlengths     r*   	<genexpr>7PartialState.split_between_processes.<locals>.<genexpr>  s     AAs1v's   z6All values in the dictionary must have the same lengthc                8  > [        U [        [        [        R                  45      (       a  U[        U 5      :  a  U SS  nOXU nT(       ag  [        U[        R                  5      (       a&  SSKJnJn  U" UTR                  5      nU" X`S   S9nU$ X3S   /TTS:  a  SOS-   [        U5      -
  -  -  nU$ [        U [        5      (       a&  U R                  5        H  nT
" X   X5      X'   M     U $ [        5       (       a  SSKJn  [        X5      (       a  U[        U 5      :  a  [        U 5      S-
  nU[        U 5      :  a  [        U 5      n[        [        X5      5      n	T(       a"  XS-
  /TTS:  a  SOS-   [        U	5      -
  -  -  n	U R!                  U	5      $ U $ )Nrc   r   )pad_across_processessend_to_device)	pad_indexr   )Dataset)
isinstancelisttupler   Tensorr   accelerate.utilsr   r   rS   dictkeysr   datasetsr   rangeselect)inputsstart_index	end_indexresultr   r   tensorized_resultkeyr   result_idcs_split_valuesapply_padding
num_extrasnum_samples_per_processr7   s             r*   r   ;PartialState.split_between_processes.<locals>._split_values  s   &4"=>>#f+-#BC[F#	:F !&%,,77Y -;64;;,O)!56GZ\S]!^  ":,2IR\_`R`Qfg2hknoukv2v"wwFD))!;;=C"/["TFK )(**0!&22&#f+5*-f+/K$s6{2(+FI&*5+H&I('M? 7
Q1TU VY\]hYi i, K  &}}[99r)   )rW   r   r   r   r   r   allvaluesr   divmodrX   min)	r7   r   r  r   r   r   r   r  r  s	   ` `  @@@@r*   split_between_processes$PartialState.split_between_processes  s     N "LVfd##V[[] 3A 678FAAAA !YZZ.4VT=O=O.P+((+BBSI[I[]gEhh"99$BTBTWaBaQghi	#	 #	J F;;s   D Dc              #  V   #    U R                  U R                  5       Sh  vN   g N7f)a  
Lets the main process go first inside a with block.

The other processes will enter the with block after the main process exits.

Example:

```python
>>> from accelerate import Accelerator

>>> accelerator = Accelerator()
>>> with accelerator.main_process_first():
...     # This will be printed first by process 0 then in a seemingly
...     # random order by the other processes.
...     print(f"This will be printed by process {accelerator.process_index}")
```
N)r   r   r   s    r*   main_process_firstPartialState.main_process_first  s!     & ##D$8$8999   )')c              #  V   #    U R                  U R                  5       Sh  vN   g N7f)a  
Lets the local main process go inside a with block.

The other processes will enter the with block after the main process exits.

Example:

```python
>>> from accelerate.state import PartialState

>>> state = PartialState()
>>> with state.local_main_process_first():
...     # This will be printed first by local process 0 then in a seemingly
...     # random order by the other processes.
...     print(f"This will be printed by process {state.local_process_index}")
```
N)r   r   r   s    r*   local_main_process_first%PartialState.local_main_process_first  s!     & ##D$>$>???r  Nc                    U R                   (       d  [        S5      eU R                  (       d  U R                  (       d  U$ [        $ )a  
Decorator that only runs the decorated function on the main process.

Args:
    function (`Callable`): The function to decorate.

Example:

```python
>>> from accelerate.state import PartialState

>>> state = PartialState()


>>> @state.on_main_process
... def print_something():
...     print("This will be printed by process 0 only.")


>>> print_something()
"This will be printed by process 0 only"
```
zUThe `PartialState` or `Accelerator` must be initialized before calling this function.)r   r   r   r   r0   r7   functions     r*   on_main_processPartialState.on_main_process%  s5    0 tuut';';Or)   c                V    U R                   (       d  U R                  (       d  U$ [        $ )a  
Decorator that only runs the decorated function on the local main process.

Args:
    function (`Callable`): The function to decorate.

Example:
```python
# Assume we have 2 servers with 4 processes each.
from accelerate.state import PartialState

state = PartialState()


@state.on_local_main_process
def print_something():
    print("This will be printed by process 0 only on each server.")


print_something()
# On server 1:
"This will be printed by process 0 only"
# On server 2:
"This will be printed by process 0 only"
```
)r   r   r0   r  s     r*   on_local_main_process"PartialState.on_local_main_processC  s     6 %%T-A-AOr)   c                V    U R                   (       d  U R                  (       d  U$ [        $ )a  
Decorator that only runs the decorated function on the last process.

Args:
    function (`Callable`): The function to decorate.

Example:
```python
# Assume we have 4 processes.
from accelerate.state import PartialState

state = PartialState()


@state.on_last_process
def print_something():
    print(f"Printed on process {state.process_index}")


print_something()
"Printed on process 3"
```
)r   r   r0   r  s     r*   on_last_processPartialState.on_last_processb  s     0 t';';Or)   c                    Uc  [        U R                  US9$ U R                  U:X  d  U R                  (       d  U$ [        $ )a"  
Decorator that only runs the decorated function on the process with the given index.

Args:
    function (`Callable`, `optional`):
        The function to decorate.
    process_index (`int`, `optional`):
        The index of the process on which to run the function.

Example:
```python
# Assume we have 4 processes.
from accelerate.state import PartialState

state = PartialState()


@state.on_process(process_index=2)
def print_something():
    print(f"Printed on process {state.process_index}")


print_something()
"Printed on process 2"
```
)rX   )r   
on_processrX   r   r0   )r7   r  rX   s      r*   r  PartialState.on_process~  s<    6 4??-HH-/9M9MOr)   c                    Uc  [        U R                  US9$ U R                  U:X  d  U R                  (       d  U$ [        $ )a  
Decorator that only runs the decorated function on the process with the given index on the current node.

Args:
    function (`Callable`, *optional*):
        The function to decorate.
    local_process_index (`int`, *optional*):
        The index of the local process on which to run the function.

Example:
```python
# Assume we have 2 servers with 4 processes each.
from accelerate import Accelerator

accelerator = Accelerator()


@accelerator.on_local_process(local_process_index=2)
def print_something():
    print(f"Printed on process {accelerator.local_process_index}")


print_something()
# On server 1:
"Printed on process 2"
# On server 2:
"Printed on process 2"
```
)rV   )r   on_local_processrV   r   r0   )r7   r  rV   s      r*   r!  PartialState.on_local_process  s@    < 400FYZZ$$(;;TEYEYOr)   c                >    U R                   (       a  [        U0 UD6  g g r-   )r   printr7   r.   r/   s      r*   r$  PartialState.print  s    %%4"6" &r)   c                   [        5       (       a)  S[        R                  S'   [        R                  " S5      $ [        5       (       a  [        R                  " S5      $ [        5       (       a  [        R                  " S5      $ [        5       (       a  [        R                  " S5      $ [        5       (       a  [        R                  " S5      $ [        5       (       a  [        R                  " S5      $ [        R                  R                  5       (       a  [        R                  " S	5      $ [        5       (       a  [        R                  " S
5      $ [        R                  " S5      $ )aY  
Returns the default device which is:
- MPS if `torch.backends.mps.is_available()` and `torch.backends.mps.is_built()` both return True.
- CUDA if `torch.cuda.is_available()`
- MLU if `is_mlu_available()`
- SDAA if `is_sdaa_available()`
- MUSA if `is_musa_available()`
- NPU if `is_npu_available()`
- HPU if `is_hpu_available()`
- CPU otherwise
1PYTORCH_ENABLE_MPS_FALLBACKmpsmlur   musanpuhpur}   xpur   )r   r   r   r   rS   r   r   r   r   r   r}   is_availabler   r   s    r*   default_devicePartialState.default_device  s     8;BJJ45<<&&<<&&  <<''  <<'' <<&&<<&&ZZ$$&&<<''<<&&<<&&r)   c                    SnU(       a  SSK nSn[        R                  nGOp[        5       (       a  Sn[        R                  nGOM[        [        R                  R                  SS5      5      S:w  Ga  U(       Gd  [        5       (       a  Sn[        R                  n[        5       (       a  Sn[        R                  nO[        5       (       a  S	n[        R                  nO[        5       (       a  S
n[        R                   nO[#        SS9(       a  Uc  S
n[        R$                  nOl[&        R(                  R+                  5       (       a  Uc  Sn[        R                  nO3[-        5       (       a$  [/        5       (       a  Uc  Sn[        R0                  nUc  [        [        R                  R                  SS5      5      S:w  d  [3        / SQS5      S:  a  U(       d   [-        5       (       a  [        R0                  nO[        R4                  nUS;   a;  [7        5       (       a,  [3        S/S5      S:  d  U[        R0                  :X  a  SSKnSnO.US;   a&  [&        R:                  R=                  5       (       a  SnOSnUc  [        R>                  nX44$ )zhPrepares any imports needed before initializing the distributed backend and sets `self.backend` properlyNr   smddpra   rb   rc   cnclrf   mcclhcclT)	init_hcclri   xccl)PMI_SIZEOMPI_COMM_WORLD_SIZEMV2_COMM_WORLD_SIZErp   r   )NcclCCL_WORKER_COUNTr=  )Nru   ru   gloo) ,smdistributed.dataparallel.torch.torch_smddpr	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   r0  r   r   r   r   r   r   oneccl_bindings_for_pytorchr   is_mpi_availabler   )r7   r   sagemaker_dprQ   rT   smdistributedrA  s          r*   r   PartialState._prepare_backend  s     ?G.88#%%G.22b12b8!! #2#<#<  "" #2#=#= "$$ #2#=#=  "## #2#<#< !D1?$G#2#<#< ((**?$G#2#<#< !##(9(;(;?$G#2#<#< #

|R01R7 iklmpqq+--#2#<#< #2#<#<  =($&&%'9&:A>BFVZiZsZsFs2M)e.?.?.P.P.R.R #.11((r)   c                2   U R                   b  gU R                  [        R                  :X  a9  U R                  (       a  [
        R                   " S5      OU R                  U l         g[        U R                  5      R                  S5      S   R                  SS5      R                  5       nUS;  a  [        SU R                   S	U S
35      eUS:X  a  [        R                  " 5       U l         gUS:X  a9  [
        R                   " S[
        R                  R                  5       5      U l         gUS:X  a  Sn[!        [
        U5      nU R"                  UR%                  5       -  n[
        R                   " X5      U l         UR'                  U R                   5        g)zJ
Sets the device in `self.device` to the current distributed environment.
Nr   .rc   MULTI_r   )	r   gpur+  r,  r-  r/  ra   r.  r   zCan't set device for z (z6), verify we should be calling `_set_device()` for it!ra   r.  rI  r}   )rS   rT   r	   r   rO   r   r1  r   splitreplacer   r   r   
xla_devicer.  current_devicegetattrrV   device_countr   )r7   rS   device_moduledevice_indexs       r*   r   PartialState.set_device.  s;    ;;"  O$6$6615%,,u-@S@SDKT**+11#6r:BB8RPVVXZZ'(=(='>bH~  U?--/DKu_,,ueii.F.F.HIDK#E62M33m6P6P6RRL,,v<DK$$T[[1r)   c                    U R                   (       a  Uc  g[        R                  R                  5       (       a   [        R                  R	                  U5        gg)z^
Destroys the process group. If one is not specified, the default process group is destroyed.
N)rU   r   r   r+   destroy_process_groupr7   groups     r*   rT  "PartialState.destroy_process_groupH  sB     %-++--33E: .r)   c                \    XR                   ;   a  [        SU S35      e[        SU S35      e)Nz(`PartialState` object has no attribute `zy`. This happens if `PartialState._reset_state()` was called and an `Accelerator` or `PartialState` was not reinitialized.z('PartialState' object has no attribute ''_known_attrsAttributeErrorr7   names     r*   __getattr__PartialState.__getattr__R  sH     $$$ :4& AL L  GvQOPPr)   )
r   rO   rQ   rR   rS   rT   rU   rV   rW   rX   rD   )r   rE   )returnr   ra  rE   )r   rE   r   z"list | tuple | dict | torch.Tensorr  rE   r-   )r  Callable[..., Any] | None)r  zCallable[..., Any]NN)r  rd  rX   
int | None)r  rd  rV   rf  )ra  ztorch.device)FFN)r   rE   rQ   
str | Nonera  ztuple[str, DistributedType]r^  r   )$rF   rG   rH   rI   rJ   
SharedDictr'   r[  r9   r   staticmethodr   propertyr   r   r   r   r   r   r   r   r	  r  r  r  r  r  r  r!  r$  r1  r   r   rT  r_  rK   r(   r)   r*   rM   rM   |   sZ   $L LMLNE`
 + + ( ( V V < < 
 
 
 
$@L% X< X<t : :( @ @(<>8B"H#  '  'F LPB)B)>HB)	$B)H24;
Qr)   rM   c                     \ rS rSrSr\" 5       r\R                  / SQ-   r         S     SS jjr	\
SS j5       rS rSS jr\
S	 5       r\S S!S
 jj5       rS"S jr\
S 5       r\
S 5       r\
SS j5       r\
SS j5       r\
SS j5       r\
SS j5       rS r\S S#S jj5       r\S 5       r\S 5       r\
S 5       r\S$S j5       r\S"S%S jj5       r S r!S$S jr"Sr#g)&r&   i_  a  
Singleton class that has information about the current training environment.

**Available attributes:**

    - **device** (`torch.device`) -- The device to use.
    - **distributed_type** ([`~accelerate.state.DistributedType`]) -- The type of distributed environment currently
      in use.
    - **parallelism_config** ([`~accelerate.utils.ParallelismConfig`]) -- The parallelism configuration for the
      current training environment. This is used to configure the distributed training environment.
    - **initialized** (`bool`) -- Whether or not the `AcceleratorState` has been initialized from `Accelerator`.
    - **local_process_index** (`int`) -- The index of the current process on the current server.
    - **mixed_precision** (`str`) -- Whether or not the current script will use mixed precision, and if so the type
      of mixed precision being performed. (Choose from 'no','fp16','bf16 or 'fp8').
    - **num_processes** (`int`) -- The number of processes currently launched in parallel.
    - **process_index** (`int`) -- The index of the current process.
    - **is_last_process** (`bool`) -- Whether or not the current process is the last one.
    - **is_main_process** (`bool`) -- Whether or not the current process is the main one.
    - **is_local_main_process** (`bool`) -- Whether or not the current process is the main one on the local node.
    - **debug** (`bool`) -- Whether or not the current script is being run in debug mode.
)deepspeed_pluginuse_ipexfsdp_pluginmegatron_lm_plugindynamo_pluginNc
                   U R                   U l        [        S5      (       a  Sn[        R                   0 :X  a  [        U40 U
D6  U R                  R	                  [        R                   5        U R                  X5        U R                  (       Gd  S U l        S U l        X`l	        Xl
        S U l        Uc  [        SS5      OUR                  5       nUS:X  a  [        5       (       d  [        S5      e[         R"                  R%                  5       (       aG  ['        5       (       d8  [(        R+                  S[         R"                  R-                  5        S35        S	nO&[/        5       (       a  [(        R+                  S
5        SnX0l        U	(       d  [        S5      eU R2                  [4        R6                  :X  a  US:w  a  SU l        OXl        U R2                  [4        R:                  :X  a  [=        SS9(       a  US:X  a  [>        R@                  RC                  S5      (       aA  [E        S5      [>        R@                  S'   [E        S5      [>        R@                  S'   SU l#        GO[E        S5      [>        R@                  S'   [E        S5      [>        R@                  S'   SU l#        GO[>        R@                  RC                  SS5      R                  5       S:X  a  U(       d  [4        R6                  U l        [I        U[J        5      (       d!  URM                  U5        URO                  SS9  OYURQ                  5        H  nURM                  U5        M     [S        [U        URQ                  5       5      5      nURO                  SS9  X@l        GOU R2                  [4        RV                  [4        RX                  [4        RZ                  [4        R\                  [4        R^                  [4        R`                  [4        Rb                  4;   Ga  [>        R@                  RC                  SS5      R                  5       S:X  d}  U R                  (       a)  U R                  Rd                  (       a  Uc  [        S5      eU R                  b6  U R                  Rd                  (       a  URf                  S:X  a  [        S5      e[>        R@                  RC                  SS5      R                  5       S:X  d+  Uc(  U R                  bd  U R                  Rd                  (       aI  [4        Rh                  U l        U R8                  S:w  a  Ub  URM                  U R8                  5        XPl5        [>        R@                  RC                  SS5      R                  5       S:X  aU  U R2                  [4        R`                  4;  a6  [4        Rl                  U l        URM                  U R8                  5        Xpl7        OcU R2                  [4        Rp                  [4        R`                  [4        Rr                  4;   a&  [u        5       (       a  [        SSS9U l        OSU l        U R0                  Rv                  [x        Rr                  :w  aS  U R8                  S:X  aC  U Rz                  R|                  S:X  a)  S[         R~                  R"                  R                  lA        U R0                  Rv                  [x        Rr                  :w  aS  U R8                  S:X  aC  U Rz                  R|                  S :X  a)  S[         R~                  R                  R                  lA        U R2                  [        R                   S!'   g g )"NACCELERATE_USE_CPUTACCELERATE_MIXED_PRECISIONnofp8zPUsing `fp8` precision requires `transformer_engine` or `MS-AMP` to be installed.z-The current device has compute capability of z which is insufficient for FP8 mixed precision training (requires a GPU Hopper/Ada Lovelace or higher, compute capability of 8.9 or higher). Will use FP16 instead.fp16zThe current HPU device is Gaudi1 which does not support FP8 mixed precision training (requires Gaudi2 or higher). Will use BF16 instead.bf16zPlease make sure to properly initialize your accelerator via `accelerator = Accelerator()` before using any functionality from the `accelerate` library.rz   ACCELERATE_DOWNCAST_BF16r   XLA_USE_BF16r   XLA_DOWNCAST_BF16Frd   r^   r_   _from_accelerator_stateACCELERATE_ALLOW_CP_STANDALONEz`cp_size > 1` specified in the `parallelism_config`, but no `fsdp_plugin` was provided. We need a `fsdp_plugin` to use context parallelism, as we also shard the model across the device mesh to save more memoryzQUsing `cp_size>1` requires FSDP2, but the provided `fsdp_plugin` is using FSDP1. rj   ACCELERATE_USE_MEGATRON_LMACCELERATE_USE_IPEXdefaultr}   r,  rT   )Cr'   r   r!   rM   update_check_initializedr   deepspeed_pluginsrn  torch_tp_pluginparallelism_configdevice_meshr    r   r   r   r   r}   r0  r   loggerwarningget_device_capabilityr   rq  rT   r	   r   rP   r   r   r   r   r   r   downcast_bfloatr   r   set_mixed_precisionr   r  nextiterr   r   r   r   r   r   r   
cp_enabledfsdp_versionr   ro  r   rp  r   r   r   rQ   r
   rS   r   backendsmatmul
allow_tf32r,  )r7   mixed_precisionr   rq  rm  ro  r  rp  r  _from_acceleratorr/   pluginfirst_plugins                r*   r9   AcceleratorState.__init__  s    **344C%%+''\7785%)D" DM#2 &8##D #* &&BDI$**, 
 %''))$j  ZZ,,..7P7R7RNNG

HhHhHjGk lb b
 '-O%''NND '-O!.$ T  $$(A(AAoY^F^(,%(7%$$(;(;;@Vdh@i"f,zz~~&@AA58V

>2:=a&

#67/3,58V

>2:=a&

#67/4, :GDJJLPVV_b(7(A(A%!"2D99$88I$++D+I"2"9"9";22?C #< $(-=-D-D-F(G#HL '''E)9&&&))))****))))))+  zz~~&FPVVX\bb..43J3J3U3UZeZm( p  //; 33>>'449(o  JJNN#8'BHHJfTXcXo++7D<S<S<^<^,;,@,@D),,49P#778M8MN'2$::>>0'%'V$(,(=(=#--F ) -<,G,GD)&::4;P;PQ.@+&&?+D+DoF_F_apasas*tt$&&$78MW[$\DM$)DM""**m.>.>>))T1KK$$.8<##**5""**m.>.>>))T1KK$$.8<##**5=A=R=RL&&'9:u  r)   c                <    U R                   [        R                   :g  $ r-   )r'   rM   r   s    r*   r   AcceleratorState.initialized  s    !!\%?%???r)   c                    [        5       R                  5       SU R                   S3-   nU R                  [        R
                  :X  a  USU R                  R                   S3-  nU$ )Nz
Mixed precision type: r   zds_config: )rM   r   r  rT   r	   r   rm  deepspeed_config)r7   reprs     r*   r   AcceleratorState.__repr__  sb    ~&&(-EdFZFZE[[]+^^  O$=$==k$"7"7"H"H!ILLDr)   c                :   U R                   (       a  SnU(       a2  U R                  R                  S:w  a  [        UR	                  SS95      eUbK  XR
                  :w  a;  U R                  [        R                  :w  a  [        UR	                  SU S3S95      egggg)zeChecks if a modification is trying to be made and the `AcceleratorState` has already been initializedzAcceleratorState has already been initialized and cannot be changed, restart your runtime completely and pass `{flag}` to `Accelerator()`.r   zcpu=True)flagNzmixed_precision='rY  )	r   rS   r   r   formatrP   rT   r	   r   )r7   r  r   errs       r*   r  #AcceleratorState._check_initialized  s     _Ct{{''50 !<==+#'<'<<))_-F-FF 3D_DUUV1W!XYY G = , r)   c                \   U R                   [        R                  :X  a  U R                  S:w  aq  U R                  R
                  nUR                  S0 5      R                  SS5      (       a  SnU$ UR                  S0 5      R                  SS5      (       a  SnU$ Sn U$ U R                  nU$ )Nrv  rw  enabledFrx  ru  )rT   r	   r   rP   rm  r  r   )r7   configr  s      r*   r   AcceleratorState.mixed_precision&  s      O$=$==$BWBW[`B`**;;Fzz&"%)))U;;"(  FB'++Iu=="(
  #'  #33Or)   c                x    [         R                  R                  5         U (       a  [        R	                  5         ggr   )r&   r'   r   rM   r   )reset_partial_states    r*   r   AcceleratorState._reset_state4  s*     	&&,,.%%' r)   c                6    [        5       R                  U5        g)z
Destroys the process group. If one is not specified, the default process group is destroyed.

If `self.fork_launched` is `True` and `group` is `None`, nothing happens.
N)rM   rT  rU  s     r*   rT  &AcceleratorState.destroy_process_group;  s     	,,U3r)   c                *    [        5       R                  $ r-   )rM   rU   r   s    r*   rU   AcceleratorState.fork_launchedC  s    ~+++r)   c                *    [        5       R                  $ )r   )rM   r   r   s    r*   r    AcceleratorState.use_distributedG  s    
 ~---r)   c                |    U R                   [        R                  :H  =(       a    U R                  R                  S:H  $ )N   )rT   r	   r   ro  r  r   s    r*   is_fsdp2AcceleratorState.is_fsdp2N  s0    $$(<(<<cAQAQA^A^bcAccr)   c                *    [        5       R                  $ )r   )rM   r   r   s    r*   r    AcceleratorState.is_last_processR       ~---r)   c                *    [        5       R                  $ )r   )rM   r   r   s    r*   r    AcceleratorState.is_main_processW  r  r)   c                *    [        5       R                  $ )r   )rM   r   r   s    r*   r   &AcceleratorState.is_local_main_process\  s     ~333r)   c                4    [        5       R                  5         g r-   )rM   r   r   s    r*   r   "AcceleratorState.wait_for_everyonea  s    ((*r)   c              #  v   #    [        5       R                  XS9 nUv   SSS5        g! , (       d  f       g= f7f)a  
Splits `input` between `self.num_processes` quickly and can be then used on that process. Useful when doing
distributed inference, such as with different prompts.

Note that when using a `dict`, all keys need to have the same number of elements.

Args:
    inputs (`list`, `tuple`, `torch.Tensor`, or `dict` of `list`/`tuple`/`torch.Tensor`):
        The input to split between processes.
    apply_padding (`bool`, `optional`, defaults to `False`):
        Whether to apply padding by repeating the last element of the input so that all processes have the same
        number of elements. Useful when trying to perform actions such as `gather()` on the outputs or passing
        in less inputs than there are processes. If so, just remember to drop the padded elements afterwards.


Example:

```python
# Assume there are two processes
from accelerate.state import AcceleratorState

state = AcceleratorState()
with state.split_between_processes(["A", "B", "C"]) as inputs:
    print(inputs)
# Process 0
["A", "B"]
# Process 1
["C"]

with state.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs:
    print(inputs)
# Process 0
["A", "B"]
# Process 1
["C", "C"]
```
)r  N)rM   r	  )r7   r   r  s      r*   r	  (AcceleratorState.split_between_processesd  s/     N ^33F3X\bL YXXs   9(	9
69c              #  x   #    [        5       R                  5          Sv   SSS5        g! , (       d  f       g= f7f)z
Lets the main process go first inside a with block.

The other processes will enter the with block after the main process exits.
N)rM   r  r   s    r*   r  #AcceleratorState.main_process_first  s#      ^..0 100   :)	:
7:c              #  x   #    [        5       R                  5          Sv   SSS5        g! , (       d  f       g= f7f)z
Lets the local main process go inside a with block.

The other processes will enter the with block after the main process exits.
N)rM   r  r   s    r*   r  )AcceleratorState.local_main_process_first  s#      ^446 766r  c                \    U R                   [        R                  :w  a  gSSKJn  U" U 5      $ )zX
Returns the currently active DeepSpeedPlugin.

If not using deepspeed, returns `None`.
Nr   )get_active_deepspeed_plugin)rT   r	   r   accelerate.utils.deepspeedr  )r7   r  s     r*   rm  !AcceleratorState.deepspeed_plugin  s)       O$=$==J*400r)   c                     U R                   U   $ )z8
Returns the DeepSpeedPlugin with the given plugin_key.
)r  r]  s     r*   get_deepspeed_plugin%AcceleratorState.get_deepspeed_plugin  s    
 %%d++r)   c                    U R                   R                  5        H  u  p#X!:w  d  M  UR                  5         M     U R                   U   R                  SS9  g)zZ
Activates the DeepSpeedPlugin with the given `name`, and will disable all other plugins.
Tr|  N)r  items	_unselectr   )r7   r^  r   r  s       r*   select_deepspeed_plugin(AcceleratorState.select_deepspeed_plugin  sN    
  11779KC{  " : 	t$++D+Ir)   c                8    [        5       R                  " U0 UD6  g r-   )rM   r$  r%  s      r*   r$  AcceleratorState.print  s    d-f-r)   c                \    XR                   ;   a  [        SU S35      e[        SU S35      e)Nz,`AcceleratorState` object has no attribute `z}`. This happens if `AcceleratorState._reset_state()` was called and an `Accelerator` or `PartialState` was not reinitialized.z,'AcceleratorState' object has no attribute 'rY  rZ  r]  s     r*   r_  AcceleratorState.__getattr__  sI     $$$ >tf EL L  KD6QRSTTr)   )r   rP   r  r  rT   r  rq  ro  rp  r  r  rn  )	NFNNNNNNF)r  rg  r   rE   r  rE   rb  re  rD   )r  rE   r-   rc  rh  )r^  rg  )$rF   rG   rH   rI   rJ   ri  r'   rM   r[  r9   rk  r   r   r  r  rj  r   rT  rU   r   r  r   r   r   r   r   r	  r  r  rm  r   r  r  r$  r_  rK   r(   r)   r*   r&   r&   _  s   , LM,, 0 L '+"'NS#NS NS  NS` @ @Z   ( (4 , , . . d d . . . . 4 4+ ' 'R     1 1 , , J J.
Ur)   r&   c                  `   \ rS rSrSr\" 5       rSSS jjr\SS j5       r	\SS j5       r
\SS j5       r\SS j5       r\SS	 j5       r\SS
 j5       rS r\S 5       r\R"                  S 5       rS rS rS r\S 5       r\S 5       r\R"                  S 5       r\SS j5       r\S 5       rSrg)GradientStatei  a  
Singleton class that has information related to gradient synchronization for gradient accumulation

**Available attributes:**

    - **end_of_dataloader** (`bool`) -- Whether we have reached the end the current dataloader
    - **remainder** (`int`) -- The number of extra samples that were added from padding the dataloader
    - **sync_gradients** (`bool`) -- Whether the gradients should be synced across all devices
    - **active_dataloader** (`Optional[DataLoader]`) -- The dataloader that is currently being iterated over
    - **dataloader_references** (`List[Optional[DataLoader]]`) -- A list of references to the dataloaders that are
        being iterated over
    - **num_steps** (`int`) -- The number of steps to accumulate over
    - **adjust_scheduler** (`bool`) -- Whether the scheduler should be adjusted to account for the gradient
        accumulation
    - **sync_with_dataloader** (`bool`) -- Whether the gradients should be synced at the end of the dataloader
        iteration and the number of total steps reset
    - **is_xla_gradients_synced** (`bool`) -- Whether the XLA gradients have been synchronized. It is initialized
      as false. Once gradients have been reduced before the optimizer step, this flag is set to true. Subsequently,
        after each step, the flag is reset to false. FSDP will always synchronize the gradients, hence
        is_xla_gradients_synced is always true.
Nc                   U R                   U l        U R                  (       d0  SU l        S /U l        Ub  UR                  5       O0 U l        SU l        Ub5  U R                  UR                  5       :w  a  UR                  5       U l        g g g )NTF)r'   r   r   sync_gradients_dataloader_references_ref	to_kwargsplugin_kwargs_is_xla_gradients_synced)r7   gradient_accumulation_plugins     r*   r9   GradientState.__init__  s    **"&D/3fD+<X<d,668jl  -2D) (38J8JNjNtNtNv8v!=!G!G!ID 9w3r)   c                :    U R                   R                  SS5      $ )z.Returns the number of steps to accumulate over	num_stepsr   r  r   r   s    r*   r  GradientState.num_steps  s     !!%%k155r)   c                :    U R                   R                  SS5      $ )z0Returns whether the scheduler should be adjustedadjust_schedulerFr  r   s    r*   r  GradientState.adjust_scheduler  s     !!%%&8%@@r)   c                :    U R                   R                  SS5      $ )zyReturns whether the gradients should be synced at the end of the dataloader iteration and the number of total steps resetsync_with_dataloaderTr  r   s    r*   r  "GradientState.sync_with_dataloader  s     !!%%&<dCCr)   c                (    [         R                  0 :g  $ )z8Returns whether the `GradientState` has been initialized)r  r'   r   s    r*   r   GradientState.initialized  s     **b00r)   c                R    U R                   (       d  gU R                  R                  $ )zAReturns whether we have reached the end of the current dataloaderF)in_dataloaderactive_dataloaderend_of_dataloaderr   s    r*   r  GradientState.end_of_dataloader  s"     !!%%777r)   c                R    U R                   (       d  gU R                  R                  $ )zOReturns the number of extra samples that were added from padding the dataloaderrc   )r  r  	remainderr   s    r*   r  GradientState.remainder  s"     !!%%///r)   c           	     p    SU R                    SU R                   SU R                   SU R                   S3	$ )NzSync Gradients: z
At end of current dataloader: z
Extra samples added: z
Gradient accumulation plugin: r   )r  r  r  r  r   s    r*   r   GradientState.__repr__  sO    t223 4--1-C-C,D E$$(NN#3 4--1-?-?,@D	
r)   c                :    [        SSS9(       a  gU R                  $ )zReturns the value of is_xla_gradients_synced. FSDP will always synchronize the gradients, hence is_xla_gradients_synced is always true.rj   Fr  T)r!   r  r   s    r*   is_xla_gradients_synced%GradientState.is_xla_gradients_synced"  s     4eD,,,r)   c                    Xl         g)z+Set the _is_xla_gradients_synced attribute.N)r  )r7   	is_synceds     r*   r  r  )  s
     )2%r)   c                    Xl         U R                   (       aL  [        SS9(       a=  [        5       R                  [        R
                  :X  a  [        R                  " 5         gggg)zhPrivate function that sets whether gradients should be synchronized. Users should not have to call this.Trz   N)r  r   rM   rT   r	   r   r   	mark_step)r7   r  s     r*   _set_sync_gradients!GradientState._set_sync_gradients.  sJ    , &D9//?3F3FFLLN G :  r)   c                0    U =R                   U/-  sl         g)zPrivate function that adds a dataloader to `self.dataloader_references` and sets `in_dataloader` to `True`. Users should not have to call this.Ndataloader_references)r7   
dataloaders     r*   _add_dataloaderGradientState._add_dataloader9  s     	""zl2"r)   c                `    U R                    Vs/ s H  o"U:w  d  M
  UPM     snU l         gs  snf )zPrivate function that removes a dataloader from `self.dataloader_references` and sets `in_dataloader` to `False` if there are no more dataloaders. Users should not have to call this.Nr   )r7   r  dataloader_refs      r*   _remove_dataloader GradientState._remove_dataloader?  s2     261K1K&
1K~akOkN1K&
" &
s   	++c                     U R                   S   $ )Nrc   r   r   s    r*   r  GradientState.active_dataloaderF  s    ))"--r)   c                X    U R                    Vs/ s H  ob  U" 5       OUPM     sn$ s  snf r-   )r  )r7   	references     r*   r  #GradientState.dataloader_referencesJ  s0     VZUtUtuUt	4	)CUtuuus   'c                n    U Vs/ s H  o"b  [         R                  " U5      OUPM     snU l        g s  snf r-   )weakrefrefr  )r7   
referencesr  s      r*   r  r  O  s:     ak+
`jR\'=GKK
#:M`j+
' +
s   $2c                    U R                   SL$ )z6Returns whether the current process is in a dataloaderN)r  r   s    r*   r  GradientState.in_dataloaderU  s     %%T11r)   c                 @    [         R                  R                  5         gr   )r  r'   r   r(   r)   r*   r   GradientState._reset_stateZ  s     	##))+r)   )r   r  r  r  r  r  r-   )r  z!GradientAccumulationPlugin | None)ra  r   rb  )rF   rG   rH   rI   rJ   ri  r'   r9   rk  r  r  r  r   r  r  r   r  setterr  r  r  r  r  r  rj  r   rK   r(   r)   r*   r  r    s@   , LMJ 6 6 A A D D 1 1 8 8 0 0
 - - ##2 $2	3
 . . v v !!
 "

 2 2 , ,r)   r  rb  )C
__future__r   loggingr   	threadingr   r  
contextlibr   	functoolsr   typingr   r   r   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   utils.dataclassesr#   torch_xla.core.xla_modelcore	xla_modelr   torch_xla.runtimeruntimer   	torch_mlu
torch_sdaa
torch_musa	torch_npu	getLoggerrF   r  r+   r0   localr2   r   ri  rM   r&   r  r(   r)   r*   <module>r*     s    #  	    %          8 8 ))"'%(%(' 
		8	$0IOO 8 011T7L
`Q `QFoU oUdL, L,r)   