
    ;iN                     Z   S SK r S SKrS SKJr  S SKJrJrJr  S SKrS SK	J
r
  S SKJrJrJrJr  SSKJr  SSKJrJrJrJr  SS	KJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%  \" \&5      r'SS
SSS SS
S
S SSSSS
SS.r(SSS00r)\)RU                  5        H'  u  r+r,\!" S\+5      (       d  M  \(R[                  \,5        M)      " S S\5      r. " S S\5      r/ " S S\5      r0 " S S5      r1 " S S5      r2 " S S\2\15      r3\" 5       (       a  S SK4J5s  J6r7   " S S\7Rp                  5      r9 " S  S!\2\15      r:S" r;              S8S#\S$\\Rx                     S%\\=   S&\\=   S'\>S(\>S)\\?\\@\4         S*\\>   S+\>S,\\   S-\>S.\\=   S/\>S0\>S1\4S2 jjrA " S3 S4\5      rB " S5 S6\2\15      rCS9S7 jrDg):    N)suppress)CallableOptionalUnion)version)BatchSampler
DataLoaderIterableDatasetRandomSampler   )
get_logger)DistributedTypeGradientStatePartialStateis_torch_xla_available)RNGType	broadcastbroadcast_object_listcompare_versionsconcatenatefind_batch_sizeget_data_structureinitialize_tensorsis_datasets_availableis_torch_version*is_torchdata_stateful_dataloader_availablesend_to_deviceslice_tensorssynchronize_rng_statesF    )
batch_sizeshufflesamplerbatch_samplernum_workers
collate_fn
pin_memory	drop_lasttimeoutworker_init_fnmultiprocessing_context	generatorprefetch_factorpersistent_workerspin_memory_device2.6.0in_orderT>=c                   F   ^  \ rS rSrSrU 4S jrU 4S jrS\4S jrSr	U =r
$ )SeedableRandomSamplerI   ax  
Same as a random sampler, except that in `__iter__` a seed can be used.

Needed specifically in distributed cases, when the random generator for each GPU needs to start from the same seed
and be fully reproducible on multiple iterations.

If a custom `generator` is passed, it will rely on its initial seed as well as the current iteration it is on
(stored in `self.epoch`).
c                    > UR                  SS 5      n[        TU ]  " U0 UD6  Ub  UO[        R                  R                  5       U l        SU l        g )N	data_seedr   )popsuper__init__torchrandominitial_seedepoch)selfargskwargsr8   	__class__s       f/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/accelerate/data_loader.pyr;   SeedableRandomSampler.__init__T   sH    JJ{D1	$)&))2)>IELLD]D]D_
    c              #     >#    U R                   ch  [        R                  " [        [        S5      (       a  [        R                  " 5       OSS9U l         U R                   R                  U R                  5        U R                  U R                  -   nU R                   R                  U5        [        TU ]%  5        S h  vN   U R                  U R                  S-   5        g  N#7f)Nget_default_devicecpudevicer   )r-   r<   	GeneratorhasattrrH   manual_seedr>   r?   r:   __iter__	set_epoch)r@   seedrC   s     rD   rO   SeedableRandomSampler.__iter__[   s     >>!"__5<UDX5Y5Yu//1_dDN NN&&t'8'89 zzD---""4(7#%%%tzzA~& 	&s   B;C$>C"?$C$r?   c                     Xl         g)z*Sets the current iteration of the sampler.N)r?   r@   r?   s     rD   rP   SeedableRandomSampler.set_epochi   s    
rF   )r?   r-   r>   )__name__
__module____qualname____firstlineno____doc__r;   rO   intrP   __static_attributes____classcell__rC   s   @rD   r5   r5   I   s#    's  rF   r5   c                   j    \ rS rSrSr    SS\S\S\S\S\4
S jjr\	S	 5       r
S
 rS rS rS rSrg)BatchSamplerShardn   a'  
Wraps a PyTorch `BatchSampler` to generate batches for one of the processes only. Instances of this class will
always yield a number of batches that is a round multiple of `num_processes` and that all have the same size.
Depending on the value of the `drop_last` attribute of the batch sampler passed, it will either stop the iteration
at the first batch that would be too small / not present on all processes or loop with indices from the beginning.

Args:
    batch_sampler (`torch.utils.data.sampler.BatchSampler`):
        The batch sampler to split in several shards.
    num_processes (`int`, *optional*, defaults to 1):
        The number of processes running concurrently.
    process_index (`int`, *optional*, defaults to 0):
        The index of the current process.
    split_batches (`bool`, *optional*, defaults to `False`):
        Whether the shards should be created by splitting a batch to give a piece of it on each process, or by
        yielding different full batches on each process.

        On two processes with a sampler of `[[0, 1, 2, 3], [4, 5, 6, 7]]`, this will result in:

        - the sampler on process 0 to yield `[0, 1, 2, 3]` and the sampler on process 1 to yield `[4, 5, 6, 7]` if
          this argument is set to `False`.
        - the sampler on process 0 to yield `[0, 1]` then `[4, 5]` and the sampler on process 1 to yield `[2, 3]`
          then `[6, 7]` if this argument is set to `True`.
    even_batches (`bool`, *optional*, defaults to `True`):
        Whether or not to loop back at the beginning of the sampler when the number of samples is not a round
        multiple of (original batch size / number of processes).

<Tip warning={true}>

`BatchSampler`s with varying batch sizes are not enabled by default. To enable this behaviour, set `even_batches`
equal to `False`

</Tip>r%   num_processesprocess_indexsplit_batcheseven_batchesc                 H   U(       a/  UR                   U-  S:w  a  [        SUR                    SU S35      eXl        X l        X0l        X@l        XPl        [        USS 5      U l         [        USS5      U l        U R                   c  U R                  (       a  [        S5      eg g )	Nr   zDTo use `BatchSamplerShard` in `split_batches` mode, the batch size (;) needs to be a round multiple of the number of processes ().r"   r)   FzYou need to use `even_batches=False` when the batch sampler has no batch size. If you are not calling this method directly, set `accelerator.even_batches=False` instead.)	r"   
ValueErrorr%   rb   rc   rd   re   getattrr)   )r@   r%   rb   rc   rd   re   s         rD   r;   BatchSamplerShard.__init__   s     ]55EJVWdWoWoVp qLLY?Z\^  +***(!-tD UC??"t'8'8f  (9"rF   c                 ,    [        U R                  5      $ Nlenr%   r@   s    rD   total_lengthBatchSamplerShard.total_length       4%%&&rF   c                    U R                   (       a  [        U R                  5      $ [        U R                  5      U R                  -  S:X  a"  [        U R                  5      U R                  -  $ [        U R                  5      U R                  -  nU R                  (       a  U$ U R
                  (       a  US-   $ U R                  [        U R                  5      U R                  -  :  a  US-   $ U$ Nr   r   )rd   ro   r%   rb   r)   re   rc   r@   lengths     rD   __len__BatchSamplerShard.__len__   s    t))**t!!"T%7%771<t))*d.@.@@@T''(D,>,>>>>MA: "&!3!3c$:L:L6MPTPbPb6b!b6A:nhnnrF   c                 d    U R                   (       a  U R                  5       $ U R                  5       $ rm   )rd   _iter_with_split_iter_with_no_splitrp   s    rD   rO   BatchSamplerShard.__iter__   s'    *.*<*<t$$&\$BZBZB\\rF   c              #     #    / nU R                   R                  U R                  -  n[        U R                   5       HJ  u  p4US:X  a  Un[	        U5      U R                  :X  d  M(  XBU R
                  -  X R
                  S-   -   v   ML     U R                  (       d  [	        U5      S:  a  [	        W5      U R                  :  a  U R                  (       d?  [	        U5      X R
                  -  :  a#  XBU R
                  -  X R
                  S-   -   v   g g [	        U5      U R                  :  a  X-  n[	        U5      U R                  :  a  M  XA-   nXBU R
                  -  X R
                  S-   -   v   g g g g 7fru   )r%   r"   rb   	enumeratero   rc   r)   re   )r@   initial_databatch_lengthidxbatchs        rD   r{   "BatchSamplerShard._iter_with_split   sQ    ))448J8JJ#D$6$67JCax$5zT__,4+=+==PbPbefPf@ghh 8 ~~#l"3a"7CJ<X$$u:/A/A AAt/A/A ALTfTfijTjDkll B ,'$//9 0L ,'$//9,4+=+==PbPbefPf@ghh =Y"7~s   AE6%C%E6*E6c              #   J  #    / n/ n[        U R                  5       H  u  p4U R                  (       d  X0R                  :  a  X-  nX0R                  -  U R                  :X  a  UnX0R                  -  U R                  S-
  :X  d  Mh  U R
                  b  [        U5      U R
                  :X  d  M  Uv   / nM     U R                  (       GdZ  [        U5      S:  GaI  U R                  (       d  [        U5      S:  a  Uv   g g [        U5      U R
                  :X  a  Uv   [        U5      U R                  U R
                  -  :  a,  X-  n[        U5      U R                  U R
                  -  :  a  M,  [        W5      U R
                  :X  a  / nWS-  nSnWU R                  -  S:w  d  [        U5      S:  ao  XPR
                  -   [        U5      -
  nXAXV -  nX0R                  -  U R                  :X  a  Uv   Un/ nUS-  nX0R                  -  S:w  a  M]  [        U5      S:  a  Mn  g g g g 7f)Nr   r   )r   r%   r)   rb   rc   r"   ro   re   )r@   r   batch_to_yieldr   r   cycle_index	end_indexs          rD   r|   %BatchSamplerShard._iter_with_no_split   s    #D$6$67JC>>c,>,>&>% '''4+=+==!&'''4+=+=+AA'3u:+H$$!# 8 ~~~#l"3a"7$$~&*(( + ~&$//9(( ,'$*<*<t*NN 0L ,'$*<*<t*NN u:0E1HC  D...!3s5zA~ +oo =E
 JI+@@E///43E3EE#"+KE1HC ...!3s5zA~) #8~s&   A>H#$H#,B?H#-BH#H#H#)r%   r"   r)   re   rb   rc   rd   N)r   r   FT)rV   rW   rX   rY   rZ   r   r[   boolr;   propertyrq   rx   rO   r{   r|   r\    rF   rD   r`   r`   n   sv     J #!#  	
  4 ' 'o$]i,-rF   r`   c                   Z    \ rS rSrSr     SS\S\S\S\S\S\4S	 jjrS
 r	S r
S rSrg)IterableDatasetShardi
  a;  
Wraps a PyTorch `IterableDataset` to generate samples for one of the processes only. Instances of this class will
always yield a number of samples that is a round multiple of the actual batch size (depending of the value of
`split_batches`, this is either `batch_size` or `batch_size x num_processes`). Depending on the value of the
`drop_last` attribute of the batch sampler passed, it will either stop the iteration at the first batch that would
be too small or loop with indices from the beginning.

Args:
    dataset (`torch.utils.data.dataset.IterableDataset`):
        The batch sampler to split in several shards.
    batch_size (`int`, *optional*, defaults to 1):
        The size of the batches per shard (if `split_batches=False`) or the size of the batches (if
        `split_batches=True`).
    drop_last (`bool`, *optional*, defaults to `False`):
        Whether or not to drop the last incomplete batch or complete the last batches by using the samples from the
        beginning.
    num_processes (`int`, *optional*, defaults to 1):
        The number of processes running concurrently.
    process_index (`int`, *optional*, defaults to 0):
        The index of the current process.
    split_batches (`bool`, *optional*, defaults to `False`):
        Whether the shards should be created by splitting a batch to give a piece of it on each process, or by
        yielding different full batches on each process.

        On two processes with an iterable dataset yielding of `[0, 1, 2, 3, 4, 5, 6, 7]`, this will result in:

        - the shard on process 0 to yield `[0, 1, 2, 3]` and the shard on process 1 to yield `[4, 5, 6, 7]` if this
          argument is set to `False`.
        - the shard on process 0 to yield `[0, 1, 4, 5]` and the sampler on process 1 to yield `[2, 3, 6, 7]` if
          this argument is set to `True`.
datasetr"   r)   rb   rc   rd   c                     U(       a   US:  a  X$-  S:w  a  [        SU SU S35      eXl        X l        X0l        X@l        XPl        X`l        g )Nr   r   zGTo use `IterableDatasetShard` in `split_batches` mode, the batch size (rg   rh   )ri   r   r"   r)   rb   rc   rd   )r@   r   r"   r)   rb   rc   rd   s          rD   r;   IterableDatasetShard.__init__+  se     Z!^
0Ja0OYZdYe fLLY?Z\^  )0$"***rF   c                 ~    Xl         [        U R                  S5      (       a  U R                  R                  U5        g g NrP   )r?   rM   r   rP   rT   s     rD   rP   IterableDatasetShard.set_epoch@  s/    
4<<--LL""5) .rF   c                 <   U R                   (       a<  [        U R                  5      U R                  U R                  -  -  U R                  -  $ [
        R                  " [        U R                  5      U R                  U R                  -  -  5      U R                  -  $ rm   )r)   ro   r   r"   rb   mathceilrp   s    rD   rx   IterableDatasetShard.__len__E  sp    >>%$//D<N<N*NOSWSbSbbb99S.$//DDVDV2VWX[_[j[jjjrF   c              #     #    [        U R                  S5      (       d}  [        U R                  S5      (       ab  [        U R                  R                  [        R
                  5      (       a/  U R                  R                  R                  U R                  5        U R                  (       a  U R                  OU R                  U R                  -  nU R                  (       a  U R                  U R                  -  OU R                  n[        U R                  U-  U R                  S-   U-  5      nS n/ nU R                   HI  nUR                  U5        [        U5      U:X  d  M%  U H	  nXW   v   M     Uc  UR                  5       n/ nMK     U R                   (       dW  [        U5      S:  aG  Uc  UR                  5       n[        U5      U:  a  XT-  n[        U5      U:  a  M  U H	  nXW   v   M     g g g 7f)NrP   r-   r   r   )rM   r   
isinstancer-   r<   rL   rN   r?   rd   r"   rb   rangerc   appendro   copyr)   )r@   real_batch_sizeprocess_batch_sizeprocess_slicefirst_batchcurrent_batchelementis           rD   rO   IterableDatasetShard.__iter__L  s    k22k224<<115??CCLL""..tzz:-1-?-?$//dooX\XjXjFjHLHZHZdoo1C1CC`d`o`od003EEHZHZ]^H^btGtu||G  )=!_4&A'** '&"/"4"4"6K " $ ~~#m"4q"8"+002m$6, m$6"#&& # #9~s   EG6%A=G6$G6)r"   r   r)   r?   rb   rc   rd   N)r   Fr   r   F)rV   rW   rX   rY   rZ   r
   r[   r   r;   rP   rx   rO   r\   r   rF   rD   r   r   
  sh    F #+ + + 	+
 + + +**
k'rF   r   c                   0    \ rS rSrSrS rS rS rS rSr	g)	DataLoaderStateMixinim  a  
Mixin class that adds a state to a `DataLoader` to keep track of the status inside the dataloader such as at the
end of the iteration, the number of items in the dataset in the last batch relative to the batch size, and other
useful information that might be needed.

**Available attributes:**

    - **end_of_dataloader** (`bool`) -- Whether at the last iteration or batch
    - **remainder** (`int`) -- The number of items that are remaining in the last batch, relative to the total
      batch size

<Tip warning={true}>

    Inheriters of this class should ensure that the class creates a `GradientState()` instance, stored in
    `self.gradient_state`.

</Tip>

c                      SU l         SU l        g NFend_of_dataloader	remainder)clsrB   s     rD   __init_subclass__&DataLoaderStateMixin.__init_subclass__  s     %rF   c                      SU l         SU l        g r   r   rp   s    rD   resetDataLoaderStateMixin.reset  s    !&rF   c                 H   U R                  5         [        [        5         U R                  (       d=  [	        U R
                  S[        U R
                  5      5      nXR                  -  U l        SSS5        U R                  R                  U 5        g! , (       d  f       N*= f)z6Prepares the gradient state for the current dataloadertotal_dataset_lengthN)r   r   	Exception
_drop_lastrj   r   ro   total_batch_sizer   gradient_state_add_dataloaderrv   s     rD   beginDataLoaderStateMixin.begin  sj    

i ?? /Es4<<GXY!'*?*?!? ! 	++D1	 ! s   AB
B!c                 :    U R                   R                  U 5        g)z9Cleans up the gradient state after exiting the dataloaderN)r   _remove_dataloaderrp   s    rD   endDataLoaderStateMixin.end  s    ..t4rF   r   N)
rV   rW   rX   rY   rZ   r   r   r   r   r\   r   rF   rD   r   r   m  s    (25rF   r   c                   V    \ rS rSrSrSS jrS rS rS r\	S 5       r
S	 rS
 rS rSrg)DataLoaderAdapteri  z
A class which wraps around a PyTorch `DataLoader` (or variants of it) to be used with the `Accelerator`. For
compatibility reasons, this class inherits from the class it wraps around, so it can be used as a drop-in.
Nc                 "   X l         [        5       (       a  SSKJn  U(       a  [        5       (       d  [	        S5      eU(       a~  [
        R                  " [        R                  R                  S5      5      nSU;   a4  [        USS5      (       a"  [        SS	5      (       a  UR                  S5        W" U4S
U0UD6U l        O[        U4S
U0UD6U l        [        U R                  S5      (       a   U R                  R                  5       U l        g g )Nr   )StatefulDataLoaderz`StatefulDataLoader is not available. Please install torchdata version 0.8.0 or higher to use it.	torchdatar2   <z0.11r3   r1   r%   
state_dict)use_stateful_dataloaderr   torchdata.stateful_dataloaderr   ImportErrorr   parse	importlibmetadatar   r   r9   base_dataloaderr	   rM   r   dl_state_dict)r@   r   r   r%   rB   r   torchdata_versions          rD   r;   DataLoaderAdapter.__init__  s    '>$577H"+U+W+Wr  # 'i.@.@.H.H.U Vf$$%6VDD$T733

:&#5g#e]#e^d#eD #-g#]]#]V\#]D 4''66!%!5!5!@!@!BD 7rF   c                 N    US:X  a
  [        5       e[        U R                  U5      $ )Nr   )AttributeErrorrj   r   )r@   names     rD   __getattr__DataLoaderAdapter.__getattr__  s'    $$ ""t++T22rF   c                     U R                   $ rm   )r   rp   s    rD   r   DataLoaderAdapter.state_dict  s    !!!rF   c                 :    U R                   R                  U5        g rm   )r   load_state_dict)r@   r   s     rD   r   !DataLoaderAdapter.load_state_dict  s    ,,Z8rF   c                 .    U R                   R                  $ )z
In order to maintain backwards compatibility with other code, we need to ensure `isinstance(obj, DataLoader)`
returns true. This is because some downstream code assumes that the `DataLoader` is the base class of the
object.
)r   rC   rp   s    rD   rC   DataLoaderAdapter.__class__  s     ##---rF   c                 ,    [        U R                  5      $ rm   )ro   r   rp   s    rD   rx   DataLoaderAdapter.__len__  s    4''((rF   c                    [        5       R                  [        R                  :w  a  [        5       R                  S-
  nU R
                  S   S:  a  U R
                  S==   U-  ss'   U R
                  S   S:  a  U R
                  S==   U-  ss'   U R
                  S   bS  SU R
                  S   ;   a?  U R
                  S   S   S:  a(  U R
                  S   S==   U R                  U-  -  ss'   ggggg)a  
Adjusts the state dict for prefetching. Natively, this will adjust all of the iters yielded keys in
`self.dl_state_dict` by a factor of `num_processes - 1`, however if a custom correction is needed, this can be
overridden.

This should modify `self.dl_state_dict` directly
r   _sampler_iter_yieldedr   _num_yielded_index_sampler_stateNsamples_yielded)r   distributed_typer   NOrb   r   r"   )r@   factors     rD   adjust_state_dict_for_prefetch0DataLoaderAdapter.adjust_state_dict_for_prefetch  s    >**o.@.@@!^11A5F!!"9:Q>""#:;vE;!!.1A5"">2f<2!!"89E%););<R)SS**+ABCTUXYY&&'=>?PQUYUdUdgmUmmQ Z T F ArF   c                     [        U R                  S5      (       aI  U R                  R                  5       U l        U R	                  5         U R
                  U R                  S'   g g )Nr   _iterator_finished)rM   r   r   r   r   r   rp   s    rD   _update_state_dict$DataLoaderAdapter._update_state_dict  sV     4''66!%!5!5!@!@!BD//17;7M7MD34 7rF   )r   r   r   )FN)rV   rW   rX   rY   rZ   r;   r   r   r   r   rC   rx   r   r   r\   r   rF   rD   r   r     sA    
C03"9 . .)n.NrF   r   c                      ^  \ rS rSrSr        SS\S\4U 4S jjjrS rU 4S jrS\	4S	 jr
\S
 5       r\S 5       rS rS rSrU =r$ )DataLoaderShardi  a  
Subclass of `DataLoaderAdapter` that will deal with device placement and current distributed setup.

Args:
    dataset (`torch.utils.data.dataset.Dataset`):
        The dataset to use to build this dataloader.
    device (`torch.device`, *optional*):
        If passed, the device to put all batches on.
    rng_types (list of `str` or [`~utils.RNGType`]):
        The list of random number generators to synchronize at the beginning of each iteration. Should be one or
        several of:

        - `"torch"`: the base torch random number generator
        - `"cuda"`: the CUDA random number generator (GPU only)
        - `"xla"`: the XLA random number generator (TPU only)
        - `"generator"`: an optional `torch.Generator`
    synchronized_generator (`torch.Generator`, *optional*):
        A random number generator to keep synchronized across processes.
    skip_batches (`int`, *optional*, defaults to 0):
        The number of batches to skip at the beginning.
    use_stateful_dataloader (`bool`, *optional*, defaults to `False`):
        Whether to have this class adapt `StatefulDataLoader` from `torchdata` instead of the regular `DataLoader`.
    **kwargs (additional keyword arguments, *optional*):
        All other keyword arguments to pass to the regular `DataLoader` initialization.

**Available attributes:**

    - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
        Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
        number of processes

    - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes.
r   _non_blockingc
                    > [         TU ]  " U4SU0U
D6  X l        X0l        X@l        XPl        [        5       U l        Xpl        Xl	        SU l
        g )Nr   r   )r:   r;   rK   	rng_typessynchronized_generatorskip_batchesr   r   r   r   	iteration)r@   r   rK   r   r   r   r   r   r   torch_device_meshrB   rC   s              rD   r;   DataLoaderShard.__init__  sP     	\:Q\U[\"&<#(+o$*rF   c              #     #    U R                   b   [        U R                   U R                  5        U R                  5         U R	                  U R
                  5        U R                  R                  5       n [        U5      nSn  U R                  b  [        X R                  U R                  S9nU R                  5         [        U5      nX0R                  :  a  Uv   US-  nUnMc  ! [         a    U R                  5          g f = f! [         a-    SU l        U R                  5         X0R                  :  a  Uv    Of = fU =R
                  S-  sl        U R                  5         g 7f)Nr   Tnon_blockingr   )r   r   r   r   rP   r   r   rO   nextStopIterationr   rK   r   r   r   r   r   )r@   dataloader_iterr   batch_index
next_batchs        rD   rO   DataLoaderShard.__iter__.  s:    >>%"4>>43N3NO

t~~&..779	 1M
 ;;*$2=++\`\n\n$oM'')!/2
"3"33''q  *   	HHJ	  ! )-&'')"3"33'' 	!
sO   A3E'6C' E'A D %E''DE'DE'4D>;E'=D>>)E'c                 :   > [         TU ]  5       n[        /USS Q7$ )z
Define the `__reduce__` method to ensure a `DataLoaderShard` can be pickled and unpickled. This needs to be
explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its
`__class__` member.
r   N)r:   
__reduce__r   r@   rA   rC   s     rD   r  DataLoaderShard.__reduce__R  s&     w!#+$qr(++rF   r?   c                    U R                   U:w  a  Xl         [        U R                  S5      (       a  U R                  R                  U5        [        U R                  S5      (       aJ  [        U R                  R                  S5      (       a%  U R                  R                  R                  U5        [        U R                  S5      (       a  [        U R                  R                  S5      (       a_  [        U R                  R                  R                  S5      (       a0  U R                  R                  R                  R                  U5        g [        U R
                  S5      (       a  U R
                  R                  U5        g g )NrP   r$   r%   )r   rM   r%   rP   r$   r   rT   s     rD   rP   DataLoaderShard.set_epoch[  s   >>U""N4%%{33((/4%%y11gd>P>P>X>XZe6f6f&&007D&&88**88)DD**88@@+NN,,44>>uE T\\;//LL""5) 0rF   c                     [        U R                  [        5      (       a  U R                  OU R                  n[	        USS5      (       a  UR
                  $ UR
                  [	        USS5      -  $ )Nrd   Frb   r   )r   r$   r   r%   rj   r"   )r@   r%   s     rD   r    DataLoaderShard.total_batch_sizen  sd    (24<<(N(NTXTfTf }ou== $$	
  **W]OUV-WW	
rF   c                     [        U R                  S5      (       a  U R                  R                  $ [        U R                  5      $ )Nrq   )rM   r   rq   ro   rp   s    rD   r   $DataLoaderShard.total_dataset_lengthw  s2    4<<00<<,,,t||$$rF   c                     [        U 5      $ rm   get_samplerrp   s    rD   r  DataLoaderShard.get_sampler~      4  rF   c                     [        U R                  [        5      nU(       a  XR                  l        g XR                  l        [	        U R                  S5      (       a  XR                  R                  l        g g Nr%   r   r$   r   r%   rM   r@   r$   sampler_is_batch_samplers      rD   set_samplerDataLoaderShard.set_sampler  V    #-dllL#I ##*LL )0&t))?;;;B""008 <rF   )	r   r   rK   r   r   r   r   r   r   )NNNr   FFFN)rV   rW   rX   rY   rZ   r   r;   rO   r  r[   rP   r   r   r   r  r  r\   r]   r^   s   @rD   r   r     s     J # % #   ."H,*s *& 
 
 % %!C CrF   r   c                      ^  \ rS rSrSrS\S\R                  4U 4S jjrU 4S jr	S\
4S jr\S	 5       r\S
 5       r\S 5       r\S 5       rSrU =r$ )MpDeviceLoaderWrapperi  a  
Wrapper for the xpl.MpDeviceLoader class that knows the total batch size.

XLA preloading threads will all call DataLoaderShard's __iter__(). Remove rng_types from DataLoaderShard to
prevent it from using the XLA device in the preloading threads, and synchronize the RNG once from the main
thread only.

**Available attributes:**

- **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
    Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
    number of processes

- **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes.

dataloaderrK   c                    > [         TU ]  X5        U R                  R                  U l        S U R                  l        X l        g rm   )r:   r;   _loaderr   
_rng_typesrK   )r@   r  rK   rC   s      rD   r;   MpDeviceLoaderWrapper.__init__  s2    GZ0"ll44DO%)DLL" KrF   c                    > U R                   b*  [        U R                   U R                  R                  5        [        TU ]  5       $ rm   )r  r   r  r   r:   rO   )r@   rC   s    rD   rO   MpDeviceLoaderWrapper.__iter__  s3    *&t8[8[\7#%%rF   r?   c                 r    [        U R                  S5      (       a  U R                  R                  U5        g g r   )rM   r  rP   rT   s     rD   rP   MpDeviceLoaderWrapper.set_epoch  s*    t44))%0 5rF   c                 .    U R                   R                  $ rm   )r  r   rp   s    rD   r   &MpDeviceLoaderWrapper.total_batch_size  s    <<000rF   c                 .    U R                   R                  $ rm   )r  r   rp   s    rD   r   *MpDeviceLoaderWrapper.total_dataset_length  s    <<444rF   c                 .    U R                   R                  $ rm   )r  r%   rp   s    rD   r%   #MpDeviceLoaderWrapper.batch_sampler  s    <<---rF   c                     U R                   $ rm   )r  rp   s    rD   r   MpDeviceLoaderWrapper.dataloader  s    <<rF   )r  rK   )rV   rW   rX   rY   rZ   r   r<   rK   r;   rO   r[   rP   r   r   r   r%   r  r\   r]   r^   s   @rD   r  r    s    	 	! 	! 	!	&	13 	1 
	1 
	1 
	5 
	5 
	. 
	. 
	  
	 rF   r  c                      ^  \ rS rSrSr       SS\S\S\4U 4S jjjrS rS rS	\	4S
 jr
S rU 4S jr\S 5       r\S 5       rS rS rSrU =r$ )DataLoaderDispatcheri  a  
Subclass of `DataLoaderAdapter` that will iterate and preprocess on process 0 only, then dispatch on each process
their part of the batch.

Args:
    split_batches (`bool`, *optional*, defaults to `False`):
        Whether the resulting `DataLoader` should split the batches of the original data loader across devices or
        yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of
        `num_processes` batches at each iteration). Another way to see this is that the observed batch size will be
        the same as the initial `dataloader` if this option is set to `True`, the batch size of the initial
        `dataloader` multiplied by `num_processes` otherwise. Setting this option to `True` requires that the batch
        size of the `dataloader` is a round multiple of `batch_size`.
    skip_batches (`int`, *optional*, defaults to 0):
        The number of batches to skip at the beginning of an iteration.
    use_stateful_dataloader (`bool`, *optional*, defaults to `False`):
        Whether to have this class adapt `StatefulDataLoader` from `torchdata` instead of the regular `DataLoader`.

**Available attributes:**

    - **total_batch_size** (`int`) -- Total batch size of the dataloader across all processes.
        Equal to the original batch size when `split_batches=True`; otherwise the original batch size * the total
        number of processes

    - **total_dataset_length** (`int`) -- Total length of the inner dataset across all processes.
rd   r   r   c	                 h  > Sn
SSK Jn  [        X5      (       a  UR                  n
[        TU ]  " U4SU0U	D6  X l        U
(       a1  [        R                  R                  R                  R                  XS9  [        5       U l        [        5       U l        XPl        X`l        X0l        Xl        Uc  [(        OUU l        SU l        S U l        S U l        S U l        U R&                  (       a  SU R&                  R4                  ;   ap  U R&                  S   U l        SU R&                  R4                  ;   a  U R&                  S   U l        SU R&                  R4                  ;   a  U R&                  S   U l        U R.                  (       a.  U R0                  (       d  U R2                  (       a  [7        S	5      eg g )
NFr   )ShufflerIterDataPiper   )r#   tpdpfsdpz4TP + (DP/FSDP) is not yet supported in dispatch mode)-torch.utils.data.datapipes.iter.combinatoricsr.  r   _shuffle_enabledr:   r;   rd   r<   utilsdatagraph_settingsapply_shuffle_settingsr   r   r   stater   r   r   r   r   slice_fnr   
submesh_tp
submesh_dpsubmesh_fsdpmesh_dim_namesri   )r@   r   rd   r   r   r   r   r9  r   rB   r#   r.  rC   s               rD   r;   DataLoaderDispatcher.__init__  sc    V g44..G\:Q\U[\*KK++BB7B\+o!^
$*(!2)1)9x  !!dd.D.D.S.S&S"44T:DOt--<<<"&"8"8">//>>>$($:$:6$B!??43D3DSTT 4E?rF   c                    Su  p#U R                   R                  S:X  Ga   U R                  (       aB  U R                  (       a  [        R                  S5        U R                  5         [        U5      nO/ nU R                  (       a6  U R                  5         [        U5      nU/U R                   R                  -  nOP[        U R                   R                  5       H-  nU R                  5         UR                  [        U5      5        M/      [        USS9n[        U5      S/nOS U R                  /n[!        U5        US   U l        U R                  (       ar  U R                  (       da  U R"                  (       dP  U R                   R                  S:X  a'  [%        U5      S:  a  [        USS9n[        U5      S/nOS S/n[!        U5        X64$ ! [         a  n[        S5      UeS nAff = f! [         a    S S/n Nf = f)	N)NNr   zUse of split_batches for TP would need the dataloader to produce duplicate batches,otherwise, use dispatch_batches=True instead.dimaG  You can't use batches of different size with `dispatch_batches=True` or when using an `IterableDataset`.either pass `dispatch_batches=False` and have each process fetch its own batch  or pass `split_batches=True`. By doing so, the main process will fetch a full batch and slice it into `num_processes` batches for each process.FTr   )r8  rc   rd   r:  loggerwarningr   r   rb   r   r   r   RuntimeErrorr   r   _stop_iterationr   r   ro   )r@   iteratorbatchesr   _e
batch_infos          rD   _fetch_batches#DataLoaderDispatcher._fetch_batches  s   #::##q((* %%L ++- NE !G//1 $X#('DJJ,D,D"D!&tzz'?'?!@A 335#NN4>: "A! +G ; 17?
  4 45Jj))!}%%doo::++q0S\A5E'Q7E"4U";U!CJ"&J%j1  7 ( !*V
  !!! ! *"D\
*s6   C,G- 
G G- 
G*G%%G**G- -G>=G>c              #   :  #    U R                  5         U R                  U R                  5        S n[        SS5      (       a  U R                  R                  5       nO4U R                  R                  S:X  a  U R                  R                  5       nSnSU l        S nU R                  U5      u  pESnU(       Gd=  XEpU R                  R                  S:w  a  [        US   5      n[        XpR                  R                  U R                  S9n[        USS9nU R                  (       d[  UcX  U R!                  U[#        SU R                  R$                  5      U R                  R                  U R                  R$                  S9nUc  ['        SU S	35      e[)        U5      n	XR                  R$                  -  n
U R                  nU(       d,  U R                  U5      u  pEU R                  (       a  US   c  S
nU R                  (       d3  U(       a,  XR                  R$                  -  S:w  a  [+        Xs/SS9nU
S-  n
[#        U R                  R                  U
-  U R                  R                  S-   U
-  5      nU R!                  UUU R                  R                  U R                  R$                  S9nU(       a  S
U l        U R/                  5         Xl        X`R2                  :  a  Uv   US-  nU(       d  GM=  U =R                  S-  sl        U R5                  5         g 7f)Nr3   z2.0.1r   Fr   )from_process)rc   rb   z"Batch does not contain any data (`zM`). At the end of all iterable data available before expected stop iteration.Tr@  r   )r   rP   r   r   r   rO   r8  rc   rE  rK  r   r   rK   r   r   r   r9  slicerb   ri   r   r   r   r   r   r   r   )r@   main_iteratorstop_iterationr   r   next_batch_infor   r   rJ  observed_batch_sizer"   
data_slices               rD   rO   DataLoaderDispatcher.__iter__T  s    

t~~&D'** !0099;MZZ%%* 0099;M$&*&9&9-&H#
  *:zz''1,*:a=9"5***;*;$J\J\]Ee!4E??{':"mm!TZZ556"&**":":"&**":":	 ,  } 8  @M  N  #2%"8,

0H0HHJ!11N! /3.A.A-.P+
''OA,>,F%)N??~:MPZPZPhPh:hlm:m#U$8a@a
tzz77*DtzzG_G_bcGcgqFqrJMM"jj66"jj66	 " E )-&'')!4///1Kq !.r 	!
s   K0L5&Lr?   c                 j   U R                   U:w  a  Xl         [        U R                  S5      (       aK  [        U R                  R                  S5      (       a&  U R                  R                  R	                  U5        g [        U R
                  S5      (       a  U R
                  R	                  U5        g g )Nr$   rP   )r   rM   r%   r$   rP   r   rT   s     rD   rP   DataLoaderDispatcher.set_epoch  s    >>U""N4%%y11gd>P>P>X>XZe6f6f&&007T\\;//LL""5) 0rF   c                     [        U R                  5      nU R                  (       a  U$ U R                  (       a  XR                  R
                  -  $ [        R                  " XR                  R
                  -  5      $ rm   )ro   r   rd   r   r8  rb   r   r   )r@   whole_lengths     rD   rx   DataLoaderDispatcher.__len__  sW    4//0__::#;#;;;99\JJ,D,DDEErF   c                 :   > [         TU ]  5       n[        /USS Q7$ )z
Define the `__reduce__` method to ensure a `DataLoaderDispatcher` can be pickled and unpickled. This needs to
be explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its
`__class__` member.
r   N)r:   r  r,  r  s     rD   r  DataLoaderDispatcher.__reduce__  s&     w!#$0tABx00rF   c                     U R                   (       a  U R                  R                  $ U R                  R                  U R                  R                  -  $ rm   )rd   r   r"   rb   rp   s    rD   r   %DataLoaderDispatcher.total_batch_size  sB     (,'9'9DLL##	
@D@W@WZ^ZfZfZtZt@t	
rF   c                 ,    [        U R                  5      $ rm   )ro   r   rp   s    rD   r   )DataLoaderDispatcher.total_dataset_length  s    4<<  rF   c                     [        U 5      $ rm   r  rp   s    rD   r   DataLoaderDispatcher.get_sampler  r  rF   c                     [        U R                  [        5      nU(       a  XR                  l        g XR                  l        [	        U R                  S5      (       a  XR                  R                  l        g g r  r  r  s      rD   r   DataLoaderDispatcher.set_sampler  r  rF   )r   r   rE  r   r   r   r   r   r9  rd   r8  r;  r<  r:  r   )Fr   FFFNN)rV   rW   rX   rY   rZ   r   r;   rK  rO   r[   rP   rx   r  r   r   r   r  r  r\   r]   r^   s   @rD   r,  r,    s    : $ % #5U 5U 5U 5U 5Un@!DJX*s *F1 
 

 ! !!C CrF   r,  c                     [        U R                  [        5      nU(       a  [        U R                  SS5      nU$ [        U R                  SS5      nU$ )z
Get the sampler associated to the dataloader

Args:
    dataloader (`torch.utils.data.dataloader.DataLoader`):
        The data loader to split across several devices.
Returns:
    `torch.utils.data.Sampler`: The sampler associated to the dataloader
r$   N)r   r$   r   rj   r%   )r  r  r$   s      rD   r  r    sP      ***<*<lK*,,i> N *22ItDNrF   r  rK   rb   rc   rd   put_on_devicer   dispatch_batchesre   slice_fn_for_dispatchuse_seedable_samplerr8   r   r   returnc                    Uc$  U(       d  SnO[        U R                  [        5      nU(       a  U(       d  [        S5      e[	        5       nUc  UR
                  nUc  UR                  nU(       a  UR                  [        R                  :X  a0  SnSUR                  ;   a  US   R                  5       nUU-  nUU-  nOSnSnSnSnSUR                  ;   a  US   R                  5       nSUR                  ;   a  US   R                  5       nSUR                  ;   a  US   R                  5       nSUR                  ;   a  US   R                  5       nUUU-  -  nUU-  nU(       a  U R                  b  U R                  nOT[        U R                  S	5      (       a  U R                  R                  nO"[        S
[        U R                  5       S35      eUS:  a%  UU-  S:w  a  [        SU R                   SU S35      eU R                  n[        U[        5      (       d  U R                  OSn[        U R                   ["        5      nSn[%        U 5      n[        U[&        5      (       ay  U
(       ar  [)        UR*                  UR,                  UR.                  [1        US[2        R4                  " [        [2        S5      (       a  [2        R6                  " 5       OSS95      US9n[        U R                   [&        5      (       a  UR                  [        R8                  :X  a  [2        R4                  " [        [2        S5      (       a  [2        R6                  " 5       OSS9n[;        [2        R<                  " S[2        R>                  S9RA                  5       RC                  5       5      nURE                  U5        UU l#        UU R                   l#        US:w  d  UR                  [        RH                  :X  Ga  U(       Gd  [K        5       (       a  SSK&Jn  [K        5       (       a9  [        UW5      (       a(  U(       d!  URN                  U:  a  URQ                  X#S9nGOj[        U[        5      (       aR  [1        U R                  SS5      b  U R                  RF                  n[S        UU R                  U RT                  UUUS9nGOU
(       d  [        US5      (       a  URF                  c  [2        R4                  " [        [2        S5      (       a  [2        R6                  " 5       OSS9Ul#        [;        [2        R<                  " S[2        R>                  S9RA                  5       RC                  5       5      nURF                  RE                  U5        URF                  nU(       a  U R                   OU R                  n[W        UUUUUS9n/ SQnUb  Uc  SU;   a  URY                  S5        [Z         Vs0 s H   nUU;  d  M  U[1        U U[Z        U   5      _M"     n nUc;  U RT                  U S'   U(       a  U(       d  U R                  U-  OU R                  U S	'   U(       a0  U R]                  S5        [_        U4UUU RT                  UU	UUS.U D6n OU(       aQ  [a        U4U(       a   UR                  [        R8                  :w  a  UOSUU R                  UU RT                  UUUS.U D6n OE[a        U4U(       a   UR                  [        R8                  :w  a  UOSUUUU RT                  UUS.U D6n [        U[(        5      (       a  U
(       a  U Rc                  U5        UR                  [        R8                  :X  a  [e        X5      $ U $ s  snf ) aJ  
Wraps a PyTorch `DataLoader` to generate batches for one of the processes only.

Depending on the value of the `drop_last` attribute of the `dataloader` passed, it will either stop the iteration
at the first batch that would be too small / not present on all processes or loop with indices from the beginning.

Args:
    dataloader (`torch.utils.data.dataloader.DataLoader`):
        The data loader to split across several devices.
    device (`torch.device`):
        The target device for the returned `DataLoader`.
    num_processes (`int`, *optional*):
        The number of processes running concurrently. Will default to the value given by [`~state.PartialState`].
    process_index (`int`, *optional*):
        The index of the current process. Will default to the value given by [`~state.PartialState`].
    split_batches (`bool`, *optional*, defaults to `False`):
        Whether the resulting `DataLoader` should split the batches of the original data loader across devices or
        yield full batches (in which case it will yield batches starting at the `process_index`-th and advancing of
        `num_processes` batches at each iteration).

        Another way to see this is that the observed batch size will be the same as the initial `dataloader` if
        this option is set to `True`, the batch size of the initial `dataloader` multiplied by `num_processes`
        otherwise.

        Setting this option to `True` requires that the batch size of the `dataloader` is a round multiple of
        `batch_size`.
    put_on_device (`bool`, *optional*, defaults to `False`):
        Whether or not to put the batches on `device` (only works if the batches are nested list, tuples or
        dictionaries of tensors).
    rng_types (list of `str` or [`~utils.RNGType`]):
        The list of random number generators to synchronize at the beginning of each iteration. Should be one or
        several of:

        - `"torch"`: the base torch random number generator
        - `"cuda"`: the CUDA random number generator (GPU only)
        - `"xla"`: the XLA random number generator (TPU only)
        - `"generator"`: the `torch.Generator` of the sampler (or batch sampler if there is no sampler in your
          dataloader) or of the iterable dataset (if it exists) if the underlying dataset is of that type.

    dispatch_batches (`bool`, *optional*):
        If set to `True`, the dataloader prepared is only iterated through on the main process and then the batches
        are split and broadcast to each process. Will default to `True` when the underlying dataset is an
        `IterableDataset`, `False` otherwise.
    even_batches (`bool`, *optional*, defaults to `True`):
        If set to `True`, in cases where the total batch size across all processes does not exactly divide the
        dataset, samples at the start of the dataset will be duplicated so the batch can be divided equally among
        all workers.
    slice_fn_for_dispatch (`Callable`, *optional*`):
        If passed, this function will be used to slice tensors across `num_processes`. Will default to
        [`~utils.slice_tensors`]. This argument is used only when `dispatch_batches` is set to `True` and will be
        ignored otherwise.
    use_seedable_sampler (`bool`, *optional*, defaults to `False`):
        Whether to use the [`~data_loader.SeedableRandomSampler`] instead of a `RandomSampler` for better
        reproducibility. Comes at a cost of potentially different performances due to different shuffling
        algorithms but ensures results will be the *exact* same. Should be paired with `set_seed()` at every
        `self.set_epoch`
    data_seed (`int`, *optional*, defaults to `None`):
        The seed to use for the underlying generator when using `use_seedable_sampler`. If `None`, the generator
        will use the current default seed from torch.
    non_blocking (`bool`, *optional*, defaults to `False`):
        If set to `True`, dataloader will utilize non-blocking host-to-device transfers. If the dataloader has
        `pin_memory` set to `True`, this will help to increase overlap between data transfer and computations.
    use_stateful_dataloader (`bool`, *optional*, defaults to `False`):
        "If set to true, the dataloader prepared by the Accelerator will be backed by "
        "[torchdata.StatefulDataLoader](https://github.com/pytorch/data/tree/main/torchdata/stateful_dataloader).
        This requires `torchdata` version 0.8.0 or higher that supports StatefulDataLoader to be installed."
    torch_device_mesh (`torch.distributed.DeviceMesh`, *optional*, defaults to `None`):
        PyTorch device mesh.


Returns:
    `torch.utils.data.dataloader.DataLoader`: A new data loader that will yield the portion of the batches

<Tip warning={true}>

`BatchSampler`s with varying batch sizes are not enabled by default. To enable this behaviour, set `even_batches`
equal to `False`

</Tip>
NFz<Using `dispatch_batches=True` requires `put_on_device=True`.r   r/  cpdp_replicatedp_shardr"   a  In order to use `split_batches==True` you must have a `batch_size` attribute either in the passed `dataloader` or `dataloader.batch_sampler` objects, and it has to return a natural number. Your `dataloader.batch_size` is None and `dataloader.batch_sampler` (`z0`) does not have the `batch_size` attribute set.r   z?To use a `DataLoader` in `split_batches` mode, the batch size (rg   rh   r-   rH   rI   rJ   )data_sourcereplacementnum_samplesr-   r8   r   )dtype)r
   )
num_shardsindex)r"   r)   rb   rc   rd   )rb   rc   rd   re   r"   r#   r$   r%   r)   r)   )rd   r%   r   r   r9  r   r   )rK   r$   r"   r   r   r   r   r   )rK   r%   r   r   r   r   r   )3r   r   r
   ri   r   rb   rc   r   r   	DEEPSPEEDr=  sizer"   rM   r%   typer$   r   r  r   r5   ro  rp  _num_samplesrj   r<   rL   rH   XLAr[   emptyint64random_itemrN   r-   MEGATRON_LMr   datasetsn_shardsshardr   r)   r`   remove_PYTORCH_DATALOADER_KWARGSr9   r,  r   r  r  )!r  rK   rb   rc   rd   rf  r   rg  re   rh  ri  r8   r   r   r   r8  submesh_tp_sizesubmesh_fsdp_sizesubmesh_dp_sizesubmesh_cp_sizebatch_size_for_checknew_datasetnew_batch_samplerr  r   r$   r-   rQ   DatasetsIterableDatasetr%   ignore_kwargskrB   s!                                    rD   prepare_data_loaderr    s   B $)**<*<oNWXXNE++++!!_%>%>>  O(777"3D"9">">"@)_<M)_<M !"OOO(777"3D"9">">"@(777"3D"9">">"@!2!A!AA"3N"C"H"H"J.===$5j$A$F$F$H!)o.OPM-?M   ,#-#8#8  z//>>'1'?'?'J'J$  j66788hj   !#(<}(LPQ(QQR\RgRgQh iLLY?Z\^ 
 $$K8B;P_8`8`
00fj)**<*<lK!*%G'=)).B
 (++++,,WUThEiEiu'?'?'Aotu
  

 *$$m449O9OSbSfSf9fOO18@T1U1U5++-[`
	 5;;r5==?DDFGd#(
'0
$e448S8SS]m ""K!##;(?@@!$$}4%++}+ZK_55z));=I)3););)E)E&.%00$..+++K (GG[,I,I$$,(-=DUL`=a=au779gl)G% u{{2U[[AIIKPPRSD%%11$7)0):):&2JJ..PZPhPhM 1+++)!M !7!?KS\D\% ,+AM! 	A7:q"<Q"?@@+    (22{6CL\J!!]2blbwbw 	| 

;)

'+!++&*$;/

 


 
"$
*u/E/EI\I\/\6bf%!,,!++&#9$;
 

 %

*u/E/EI\I\/\6bf+#9!++&$;

 


 '0116Jw'!4!44$Z88os   ,
]?:]?c                   >    \ rS rSrSrS	S jrS r\S 5       rS r	Sr
g)
SkipBatchSampleri   z
A `torch.utils.data.BatchSampler` that skips the first `n` batches of another `torch.utils.data.BatchSampler`.
Should not be used if the original dataloader is a `StatefulDataLoader`.
c                     Xl         X l        g rm   r%   r   )r@   r%   r   s      rD   r;   SkipBatchSampler.__init__&  s    *(rF   c              #   r   #    [        U R                  5       H  u  pXR                  :  d  M  Uv   M     g 7frm   )r   r%   r   )r@   rt  sampless      rD   rO   SkipBatchSampler.__iter__*  s-     '(:(:;NE))) <s   (7	7c                 ,    [        U R                  5      $ rm   rn   rp   s    rD   rq   SkipBatchSampler.total_length/  rs   rF   c                 F    [        U R                  5      U R                  -
  $ rm   )ro   r%   r   rp   s    rD   rx   SkipBatchSampler.__len__3  s    4%%&):):::rF   r  Nr   )rV   rW   rX   rY   rZ   r;   rO   r   rq   rx   r\   r   rF   rD   r  r     s*    
)
 ' ';rF   r  c                   H   ^  \ rS rSrSrSU 4S jjrS rS rU 4S jrSr	U =r
$ )	SkipDataLoaderi7  a  
Subclass of a PyTorch `DataLoader` that will skip the first batches. Generally it's preferable to use
`skip_first_batches`/`torchdata.StatefulDataLoader` instead of this class.

Args:
    dataset (`torch.utils.data.dataset.Dataset`):
        The dataset to use to build this dataloader.
    skip_batches (`int`, *optional*, defaults to 0):
        The number of batches to skip at the beginning.
    kwargs:
        All other keyword arguments to pass to the regular `DataLoader` initialization.
c                 V   > [         TU ]  " U4SU0UD6  X l        [        5       U l        g )Nr   )r:   r;   r   r   r   )r@   r   r   r   rB   rC   s        rD   r;   SkipDataLoader.__init__E  s,    \:Q\U[\(+orF   c              #      #    U R                  5         [        U R                  R                  5       5       H*  u  pXR                  :  d  M  U R                  5         Uv   M,     U R                  5         g 7frm   )r   r   r   rO   r   r   r   )r@   rt  r   s      rD   rO   SkipDataLoader.__iter__J  sW     

%d&:&:&C&C&EFLE)))'') G 	
s   AA5)A5c                 F    [        U R                  5      U R                  -
  $ rm   )ro   r   r   rp   s    rD   rx   SkipDataLoader.__len__R  s    4''(4+<+<<<rF   c                 :   > [         TU ]  5       n[        /USS Q7$ )z
Define the `__reduce__` method to ensure a `SkipDataLoader` can be pickled and unpickled. This needs to be
explicitly defined since default pickling behavior is broken by `DataLoaderAdapter` messing with its
`__class__` member.
r   N)r:   r  r  r  s     rD   r  SkipDataLoader.__reduce__U  s&     w!#*ab**rF   )r   r   )r   F)rV   rW   rX   rY   rZ   r;   rO   rx   r  r\   r]   r^   s   @rD   r  r  7  s!    .
=+ +rF   r  c           
         [        5       nUR                  [        R                  :X  a  U R                  nU R
                  n U R                  nSn[        U[        5      (       a  SnOB[        U R                  [        5      nU(       a  U R                  OU R                  n[        XqS9n/ SQn[         V	s0 s H  n	X;  d  M
  U	[        X	[        U	   5      _M      n
n	Uc  U R                  U
S'   U R                   U
S'   [        U ["        5      (       a,  Uc  XS'   [#        U4U R$                  UU R&                  S.U
D6n O[        U [(        5      (       aV  Uc  XS'   OU(       a  XjS	'   U R                   U
S'   OXjS
'   [)        U4U R                  U R*                  U R,                  S.U
D6n O Uc  [/        U4SU0U
D6n O[1        U4S
U0U
D6n UR                  [        R                  :X  a  [3        U W5      n U $ s  sn	f )z
Creates a `torch.utils.data.DataLoader` that will efficiently skip the first `num_batches`. Should not be used if
the original dataloader is a `StatefulDataLoader`.
FN)r   ru  r)   r"   r   )rd   r%   r   r$   r%   )rK   r   r   )r   r   r   rz  rK   r  r   r   r
   r$   r   r%   r  r  rj   r)   r"   r,  rd   r   r   r   r   r  r	   r  )r  num_batchesr8  rK   r   r  r  r%   r  r  rB   s              rD   skip_first_batchesr  _  s   
 NE!4!44""**
  G$'?++ #-j.@.@,#O .F
**JLdLd,]UM ,+A! 	A7:"<Q"?@@+    (22{)44|*233$%0>")
$22+!,,	

 

 
J	0	0$%0>"% 19#-#8#8F< &7?#$
$$ **#-#D#D	

 

 $'TkTVTJ#GW;LWPVWJ!4!44*:v>
as   5	G5G5)NNNFFNNTNFNFFNr  )Er   r   
contextlibr   typingr   r   r   r<   	packagingr   torch.utils.datar   r	   r
   r   loggingr   r8  r   r   r   r   r4  r   r   r   r   r   r   r   r   r   r   r   r   r   r   rV   rB  r  %_PYTORCH_DATALOADER_ADDITIONAL_KWARGSitemsvadditional_kwargsupdater5   r`   r   r   r   r   %torch_xla.distributed.parallel_loaderdistributedparallel_loaderxplMpDeviceLoaderr  r,  r  rK   r[   r   liststrr  r  r  r  r   rF   rD   <module>r     s      , ,   U U  W W   $ 
H	 # & *1:t2D(E %AGGIAa  "))*;< J
"M "JY Yx`'? `'F(5 (5VYN YNxTC')= TCn 77/  2 2 / dOC,.B OCd( &*#'#'59'+04!&#$)yyU\\"y C=y C=	y
 y y U3<012y tny y $H-y y }y y "y  !yx	;| ;.%+&(< %+PLrF   