
    9ia^                        d Z ddlZddlmZmZ ddlmZmZ ddlZddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZmZ  ed      r	ddlmc mZ  e       rddlmZ d Zd Zd Zd Zd ZedddZd3dZ d Z!d Z"d Z#d Z$d Z%d Z&d Z' G d de(      Z)d Z*d  Z+e*d!        Z,d"efd#Z-d"efd$Z.d4d%Z/d5d&Z0e*d4d'e1fd(       Z2d4d'e1fd)Z3d6d*Z4d4d+Z5e+d7d,       Z6e*d8d-       Z7d. Z8 G d/ d0      Z9d1 Z:d2 Z;y)9zB
A set of basic tensor ops compatible with tpu, gpu, and multigpu
    N)update_wrapperwraps)AnyMapping   )PartialState   )!TORCH_DISTRIBUTED_OPERATION_TYPES)DistributedTypeTensorInformation)is_torch_distributed_availableis_torch_versionis_tpu_availableF)check_device)ReduceOpc                 6    t        | t        j                        S N)
isinstancetorchTensortensors    e/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/accelerate/utils/operations.pyis_torch_tensorr   '   s    fell++    c           
      v   t        | t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  t        j                  j                  t        j                  j                  t        j                  j                        S r   )
r   r   xpuFloatTensor
ByteTensor	IntTensor
LongTensor
HalfTensorDoubleTensorBFloat16Tensorr   s    r   is_torch_xpu_tensorr%   +   sm    														  	 	r   c                 "    t        | t              S r   )r   r   tensor_infos    r   is_tensor_informationr)   8   s    k#455r   c                     t        |       }|j                  }t        |      dk7  s|d   t        k7  ryt	        |dd      }t        |t              syt        d |D              S )z
    Checks if `x` is a `namedtuple` or not. Can have false positives, but only if a user is trying to mimic a
    `namedtuple` perfectly.
    r	   r   F_fieldsNc              3   <   K   | ]  }t        |t                y wr   )r   str).0members     r   	<genexpr>z is_namedtuple.<locals>.<genexpr>H   s     <6z&#&<s   )type	__bases__lentuplegetattrr   all)data	data_typebasesfieldss       r   is_namedtupler;   <   s]    
 T
IE
5zQ%(e+Y	40Ffe$<V<<<r   c                 h    t        |       r t        |       t        |       S  t        |       |      S )zO
    Cast a generator to the same type as obj (list, tuple, or namedtuple)
    )r;   r1   list)obj	generators     r   
honor_typer@   K   s2    
 StCy$y/**tCy##r   	test_typeerror_on_other_typec                    t        |t        t        f      rt        | fd|D              S t        |t              rD t        |      |j                         D ci c]  \  }}|t         |gd c}}      S  |      r  |gi S r2t        dt        |       d j                   dj                   d      |S c c}}w )a9  
    Recursively apply a function on a data structure that is a nested list/tuple/dictionary of a given base type.

    Args:
        func (`callable`):
            The function to recursively apply.
        data (nested list/tuple/dictionary of `main_type`):
            The data on which to apply `func`
        *args:
            Positional arguments that will be passed to `func` when applied on the unpacked data.
        main_type (`type`, *optional*, defaults to `torch.Tensor`):
            The base type of the objects to which apply `func`.
        error_on_other_type (`bool`, *optional*, defaults to `False`):
            Whether to return an error or not if after unpacking `data`, we get on an object that is not of type
            `main_type`. If `False`, the function will leave objects of types different than `main_type` unchanged.
        **kwargs:
            Keyword arguments that will be passed to `func` when applied on the unpacked data.

    Returns:
        The same data structure as `data` with `func` applied to every object of type `main_type`.
    c              3   D   K   | ]  }t        |gd   yw)rA   Nrecursively_apply)r.   oargsrC   funckwargsrB   s     r   r0   z$recursively_apply.<locals>.<genexpr>o   s@        "!".7M`djs    rA   zUnsupported types (z) passed to `z?`. Only nested list/tuple/dicts of objects that are valid for `z` should be passed.)
r   r4   r=   r@   r   r1   itemsrG   	TypeError__name__)rJ   r7   rB   rC   rI   rK   kvs   ` ````  r   rG   rG   V   s   , $& 	
 	
 
D'	"tDz
 !JJL	 Aq $!".7M`dj 
 	
 
4D*4*6**	!$t*]4==/ J++4+=+=*>>QS
 	
 Ks   #C
c                    t        | t        t        f      rt        | fd| D              S t        | t              r^t        t
              rgng  t        |       | j                         D ci c]  \  }}||v r|nt        |       c}}      S t        | d      r	 | j                        S | S c c}}w # t        $ r | j                        cY S w xY w)a  
    Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to a given device.
        device (`torch.device`):
            The device to send the data to.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    c              3   <   K   | ]  }t        |         yw)non_blocking	skip_keysN)send_to_device)r.   tdevicerT   rU   s     r   r0   z!send_to_device.<locals>.<genexpr>   s!     ocd^AvLT]^^os   rS   to)rT   )r   r4   r=   r@   r   r-   r1   rL   rV   hasattrrY   rM   )r   rX   rT   rU   rO   rW   s    ```  r   rV   rV      s     &5$-(ohno
 	
 
FG	$i%"IItF| #LLNAq Y1N1fS_kt,uu
 	
 
		%99V,9??   	%99V$$	%s   7 C
,C C$#C$c                      d }t        ||       S )aK  
    Recursively gathers the information needed to rebuild a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with [`~utils.TensorInformation`] instead of tensors.
    c                 D    t        | j                  | j                        S )N)shapedtype)r   r]   r^   r   s    r   _get_data_structurez/get_data_structure.<locals>._get_data_structure   s     v||6<<HHr   rF   )r7   r_   s     r   get_data_structurer`      s    I 0$77r   c                      d }t        ||       S )a:  
    Recursively gathers the shape of a nested list/tuple/dictionary of tensors as a list.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to send to analyze.

    Returns:
        The same data structure as `data` with lists of tensor shapes instead of tensors.
    c                 ,    t        | j                        S r   )r=   r]   r   s    r   
_get_shapezget_shape.<locals>._get_shape   s    FLL!!r   rF   )r7   rc   s     r   	get_shaperd      s    " Z..r   c                 ,    d }t        || t              S )z
    Recursively initializes tensors from a nested list/tuple/dictionary of [`~utils.TensorInformation`].

    Returns:
        The same data structure as `data` with tensors instead of [`~utils.TensorInformation`].
    c                 T    t        j                  | j                  d| j                  iS Nr^   )r   emptyr]   r^   r'   s    r   _initialize_tensorz.initialize_tensors.<locals>._initialize_tensor   s"    {{K--G[5F5FGGr   rB   )rG   r)   )data_structureri   s     r   initialize_tensorsrl      s    H /K`aar   c                 6   t        | t        t        f      rt        | d         S t        | t              r%| j                         D ]  }t        | |         c S  n2t        | t        j                        st        dt        |        d      | j                  d   S )a  
    Recursively finds the batch size in a nested list/tuple/dictionary of lists of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to find the batch size.

    Returns:
        `int`: The batch size.
    r   z0Can only find the batch size of tensors but got .)r   r4   r=   find_batch_sizer   keysr   r   rM   r1   r]   )r7   rO   s     r   ro   ro      s     $&tAw''	D'	" 	,A"47++	,ell+J4PT:,VWXYY::a=r   c                      d }t        ||       S )aS  
    Recursively finds tensors in a nested list/tuple/dictionary and converts them to a list of numbers.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`): The data from which to convert to regular numbers.

    Returns:
        The same data structure as `data` with lists of numbers instead of `torch.Tensor`.
    c                     | j                         j                         } | j                  t        j                  k(  r| j                  t        j                        } | j                         S r   )detachcpur^   r   bfloat16rY   float32tolistr   s    r   _convert_to_listz!listify.<locals>._convert_to_list   sF    $$&<<5>>) YYu}}-F}}r   rF   )r7   rx   s     r   listifyry      s     -t44r   c                 P    d }t        || d      }t        j                          |S )Nc                     | j                   dk(  r| j                         d    } | j                         s| j                         } t	        j
                  |       S )Nr   )ndimcloneis_contiguous
contiguousxm
all_gatherr   s    r   _tpu_gather_onez$_tpu_gather.<locals>._tpu_gather_one  sI    ;;!\\^D)F ##%&&(F}}V$$r   TrC   )rG   r   	mark_step)r   r   ress      r   _tpu_gatherr     s%    % OV
NCLLNJr   c                     t               t        dd      rt        j                  j                  nt        j                  j
                  fd}t        || d      S )Nz>=z1.13c                    | j                   dk(  r| j                         d    } | j                         s| j                         } j                  j                  dk7  rtt        j                  j                  | j                         z  | j                  j                        } ||         |j                  dg| j                         dd   S t        j                        D cg c]  }t        j                  |        }}t
        j                  j!                  ||        t        j"                  |d      S c c}w )Nr   gloo)r^   rX   r	   dim)r|   r}   r~   r   backendr   rh   num_processesnumelr^   rX   viewsizerange
empty_likedistributedr   cat)r   output_tensors_	gather_opstates      r   _gpu_gather_onez$_gpu_gather.<locals>._gpu_gather_one!  s   ;;!\\^D)F ##%&&(F==$&)@
 #[[##flln4ll||N
 nf-&>&&r>FKKM!",=>>
 AFeFYFY@Z[1e..v6[N[((@99^33 \s   *D?Tr   )r   r   r   r   all_gather_into_tensor_all_gather_baserG   )r   r   r   r   s     @@r   _gpu_gatherr     sL    NEf%%%<<	%%66	48 _f$OOr   c                       e Zd ZdZy)DistributedOperationExceptionz
    An exception class for distributed operations. Raised if the operation cannot be performed due to the shape of the
    tensors.
    N)rN   
__module____qualname____doc__ r   r   r   r   @  s    
 	r   r   c                 .     t                fd       }|S )zv
    Verifies that `tensor` is the same shape across all processes. Only ran if `PartialState().debug` is `True`.
    c            
         t               j                  t        j                  k(  st               j                  s 
| i |S 
j
                   d
j                   }d|v r|d   }n| d   }t        |      }t        |g      }|d   f|j                  |d         t        |      k(  }|sDdj                  t        |      D cg c]  \  }}d| d|  c}}      }	t        d| d|	        
| i |S c c}}w )	Nrn   r   r   z
  - zProcess z: znCannot apply desired operation due to shape mismatches. All shapes across devices must be valid.

Operation: `z`
Input shapes:
  - )r   distributed_typer   NOdebugr   rN   rd   gather_objectcountr3   join	enumerater   )rI   rK   	operationr   shapesoutputare_sameir]   process_shape_strfunctions             r   wrapperz!verify_operation.<locals>.wrapperN  s   >**o.@.@@H\H\T,V,,**+1X->->,?@	vH%F!WF6"x(!9 ||F1I.#f+=H$,MM[dek[l2mxqRWXaS5'3J2m$n!3''0k1GHYGZ\ 
 ((( 3ns   C7
r   r   r   s   ` r   verify_operationr   I  s"    
 8_) )* Nr   c                 .     t                fd       }|S )z
    Checks that `verify_operation` failed and if so reports a more helpful error chaining the existing
    `DistributedOperationException`.
    c                      	  | i |S # t         $ r0}j                   dj                   }t        d| d      |d }~ww xY w)Nrn   zError found while calling `z1`. Please see the earlier error for more details.)r   r   rN   )rI   rK   er   r   s       r   r   z"chained_operation.<locals>.wrapperm  sc    	T,V,,, 	#../q1B1B0CDI/-i[8ij	s    	A+?Ar   r   s   ` r   chained_operationr   g  s"     8_  Nr   c                     t               j                  t        j                  k(  rt	        |       S t               j                  t
        v rt        |       S | S )a4  
    Recursively gather tensor in a nested list/tuple/dictionary of tensors from all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.

    Returns:
        The same data structure as `tensor` with all tensors sent to the proper device.
    )r   r   r   TPUr   r
   r   r   s    r   gatherr   z  sF     ~&&/*=*==6""		(	(,M	M6""r   objectc                     t        t               j                        D cg c]  }d  }}t        j                  j                  ||        |D cg c]  }|D ]  }|  c}}S c c}w c c}}w r   )r   r   r   r   r   all_gather_object)r   r   output_objectsyxs        r   _gpu_gather_objectr     s`    $),.*F*F$GHqdHNH	''?%1!q1!A1A11 I 2s   	A$A)c                     t               j                  t        j                  k(  rt	        d      t               j                  t
        v rt        |       S | S )a5  
    Recursively gather object in a nested list/tuple/dictionary of objects from all devices.

    Args:
        object (nested list/tuple/dictionary of picklable object):
            The data to gather.

    Returns:
        The same data structure as `object` with all the objects sent to every device.
    z&gather objects in TPU is not supported)r   r   r   r   NotImplementedErrorr
   r   )r   s    r   r   r     sG     ~&&/*=*==!"JKK		(	(,M	M!&))r   c                 (    dd}t        || d|      S )Nc                 H    t         j                  j                  | |       | S )Nsrc)r   r   	broadcast)r   r   s     r   _gpu_broadcast_onez*_gpu_broadcast.<locals>._gpu_broadcast_one  s     ##F#4r   T)rC   r   r   rF   )r7   r   r   s      r   _gpu_broadcastr     s     /4UXYYr   c                 T   t        | t        t        f      rt        | fdt	        |       D              S t        | t
              rC t        |       | j                         D ci c]  \  }}|t        | d|        c}}      S t        j                  | fd      S c c}}w )Nc              3   H   K   | ]  \  }}t        | d |         yw)r   nameN)_tpu_broadcast)r.   r   rW   r   s      r   r0   z!_tpu_broadcast.<locals>.<genexpr>  s*     "gTQPQ>!TF!A3-#H#H"gs   "r   r   c                     |    S r   r   )r   r   s    r   <lambda>z _tpu_broadcast.<locals>.<lambda>  s    !C& r   )r   r=   r4   r@   r   r   r1   rL   r   r   mesh_reduce)r   r   r   rO   rP   s    ``  r   r   r     s    &4-(&"gU^_eUf"ghh	FG	$tF|RXR^R^R`a$!QQq$q} EEabb>>$(899 bs   %B$
from_processc                     t               j                  t        j                  k(  rt	        | |d      S t               j                  t
        v rt        | |      S | S )a  
    Recursively broadcast tensor in a nested list/tuple/dictionary of tensors to all devices.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data

    Returns:
        The same data structure as `tensor` with all tensors broadcasted to the proper device.
    zaccelerate.utils.broadcast)r   r   r   )r   r   r   r   r   r
   r   )r   r   s     r   r   r     sM     ~&&/*=*==f,=YZZ		(	(,M	Mf,77r   c                 ,   t               j                  t        j                  k(  r2t	        |       D ]"  \  }}t        j                  d|fd      | |<   $ | S t               j                  t        v r!t        j                  j                  |        | S )a  
    Broadcast a list of picklable objects form one process to the others.

    Args:
        object_list (list of picklable objects):
            The list of objects to broadcast. This list will be modified inplace.
        from_process (`int`, *optional*, defaults to 0):
            The process from which to send the data.

    Returns:
        The same list containing the objects from process 0.
    z&accelerate.utils.broadcast_object_listc                     |    S r   r   )r   r   s    r   r   z'broadcast_object_list.<locals>.<lambda>  s    efgset r   r   )r   r   r   r   r   r   r   r
   r   r   broadcast_object_list)object_listr   r   r>   s    `  r   r   r     s     ~&&/*=*==, 	vFAs^^,TVY[tuKN	v  
	(	(,M	M///Nr   c                 "    d }t        || |      S )aN  
    Recursively takes a slice in a nested list/tuple/dictionary of tensors.

    Args:
        data (nested list/tuple/dictionary of `torch.Tensor`):
            The data to slice.
        tensor_slice (`slice`):
            The slice to take.

    Returns:
        The same data structure as `data` with all the tensors slices.
    c                     | |   S r   r   )r   tensor_slices     r   _slice_tensorz$slice_tensors.<locals>._slice_tensor  s    l##r   rF   )r7   r   process_indexr   r   s        r   slice_tensorsr     s    $ ]D,??r   c                     t         d   t        t        f      r.t         d    fdt	        t         d               D              S t         d   t              rR t         d          d   j                         D ci c]!  }|t         D cg c]  }||   	 c}      # c}}      S t         d   t        j                        st        dt         d                t        j                         S c c}w c c}}w )a  
    Recursively concatenate the tensors in a nested list/tuple/dictionary of lists of tensors with the same shape.

    Args:
        data (nested list/tuple/dictionary of lists of tensors `torch.Tensor`):
            The data to concatenate.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to concatenate.

    Returns:
        The same data structure as `data` with all the tensors concatenated.
    r   c              3   d   K   | ]"  }t        D cg c]  }||   	 c}        $ yc c}w w)r   N)concatenate)r.   r   dr7   r   s      r   r0   zconcatenate.<locals>.<genexpr>  s.     #lSTKt0D!10D#$N$N#l0Ds   0+
0r   z%Can only concatenate tensors but got )r   r4   r=   r@   r   r3   r   r1   rp   r   r   r   rM   r   )r7   r   rO   r   s   ``  r   r   r     s     $q'E4=)$q'#lX]^abfghbi^jXk#lmm	DGW	%tDG}UYZ[U\UaUaUcdPQaD-Aqad-As!KKdeeQ.?T!WOPP99Ts## .Bds   D
C=D
=D
c                 ,    dd}t        || d|||      S )a3  
    Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so they
    can safely be gathered.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to gather.
        dim (`int`, *optional*, defaults to 0):
            The dimension on which to pad.
        pad_index (`int`, *optional*, defaults to 0):
            The value with which to pad.
        pad_first (`bool`, *optional*, defaults to `False`):
            Whether to pad at the beginning or the end.
    c                 >  	
 t        | j                        k\  r| S t        j                  | j                  | j                        d    }t        |      j                         }t        fd|D              		| j                     k(  r| S | j                  
t        
      }	|<   | j                  t        |            |z   }|r)t        	
fdt        t        |            D              }n't        
fdt        t        |            D              }| ||<   |S )N)rX   c              3   (   K   | ]	  }|     y wr   r   )r.   sr   s     r   r0   zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>&  s     -!qv-s   c              3   b   K   | ]&  }|k(  rt           z
        n
t        d        ( y wr   slice)r.   r   r   max_sizeold_sizes     r   r0   zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>/  s6      [\Q#Xh#.95QU;Vs   ,/c              3   \   K   | ]#  }|k(  rt        d          n
t        d       % yw)r   Nr   )r.   r   r   r   s     r   r0   zFpad_across_processes.<locals>._pad_across_processes.<locals>.<genexpr>3  s,     oUVqCxE!Xc]3U4[Pos   ),)r3   r]   r   r   rX   r   rt   maxr=   	new_zerosr4   r   )r   r   	pad_index	pad_firstr   sizesnew_size
new_tensorindicesr   r   s    `       @@r   _pad_across_processesz3pad_across_processes.<locals>._pad_across_processes  s    #fll##M ||FLL?Et  "-u--v||C((M<<> %%eHo6B
 `efijrfs`t G oZ_`cdl`mZnooG$
7r   T)rC   r   r   r   r   r   FrF   )r   r   r   r   r   s        r   pad_across_processesr     s%    "2 v4ST]ir r   c                 *    dd}t        || d||      S )aX  
    Recursively reduce the tensors in a nested list/tuple/dictionary of lists of tensors across all processes by the
    mean of a given operation.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to reduce.
        reduction (`str`, *optional*, defaults to `"mean"`):
            A reduction method. Can be of "mean", "sum", or "none"
        scale (`float`, *optional*):
            A default scaling value to be applied after the reduce, only valied on XLA.

    Returns:
        The same data structure as `data` with all the tensors reduced.
    c                    t               }| j                         }|j                  t        j                  k(  r|S |j                  t        j
                  k(  rt        j                  d||       nJ|j                  j                  t        v r.t        j                  j                  |t        j                         |dk(  r||j                  z  }|S )Nsummean)r   r}   r   r   r   r   r   
all_reducevaluer
   r   r   r   SUMr   )r   	reductionscaler   cloned_tensors        r   _reduce_across_processesz(reduce.<locals>._reduce_across_processesN  s    !!_%7%77  !!_%8%88MM%6##))-NN((EU000Mr   T)rC   r  r  r   g      ?rF   )r   r  r  r  s       r   reducer	  <  s"    $  &di_d r   c                 *    d }d }t        || |      S )av  
    Recursively converts the elements nested list/tuple/dictionary of tensors in FP16/BF16 precision to FP32.

    Args:
        tensor (nested list/tuple/dictionary of `torch.Tensor`):
            The data to convert from FP16/BF16 to FP32.

    Returns:
        The same data structure as `tensor` with all tensors that were in FP16/BF16 precision converted to FP32.
    c                 "    | j                         S r   )floatr   s    r   _convert_to_fp32z)convert_to_fp32.<locals>._convert_to_fp32l  s    ||~r   c                 v    t        | d      xr, | j                  t        j                  t        j                  fv S rg   )rZ   r^   r   float16ru   r   s    r   _is_fp16_bf16_tensorz-convert_to_fp32.<locals>._is_fp16_bf16_tensoro  s*    vw'[FLLU]]ENN<[,[[r   rj   rF   )r   r  r  s      r   convert_to_fp32r  `  s     \ -vAUVVr   c                   "    e Zd ZdZd Zd Zd Zy)ConvertOutputsToFp32ad  
    Decorator to apply to a function outputing tensors (like a model forward pass) that ensures the outputs in FP16
    precision will be convert back to FP32.

    Args:
        model_forward (`Callable`):
            The function which outputs we want to treat.

    Returns:
        The same function as `model_forward` but with converted outputs.
    c                 *    || _         t        | |       y r   )model_forwardr   )selfr  s     r   __init__zConvertOutputsToFp32.__init__  s    *t]+r   c                 8    t         | j                  |i |      S r   )r  r  )r  rI   rK   s      r   __call__zConvertOutputsToFp32.__call__  s     1t114B6BCCr   c                 ,    t        j                  d      )NzCannot pickle a prepared model with automatic mixed precision, please unwrap the model with `Accelerator.unwrap_model(model)` before pickling it.)picklePicklingError)r  s    r   __getstate__z!ConvertOutputsToFp32.__getstate__  s    "" `
 	
r   N)rN   r   r   r   r  r  r  r   r   r   r  r  u  s    
,D
r   r  c                 6     t                 fd} |_        |S )Nc                       | i |S r   r   )rI   rK   r  s     r   forwardz(convert_outputs_to_fp32.<locals>.forward  s    d-f--r   )r  __wrapped__)r  r   s   ` r   convert_outputs_to_fp32r"    s!    (7M. (GNr   c                    t        | t              r'| j                         D ]  }t        |      }||c S  yt        | t        t
        f      r| D ]  }t        |      }||c S  yt        | t        j                        r| j                  S y)z
    Finds the device on which a nested dict/list/tuple of tensors lies (assuming they are all on the same device).

    Args:
        (nested list/tuple/dictionary of `torch.Tensor`): The data we want to know the device of.
    N)	r   r   valuesfind_devicer4   r=   r   r   rX   )r7   r>   rX   s      r   r%  r%    s     $ ;;= 	C %F!	 
D5$-	( 	C %F!	 
D%,,	'{{ 
(r   )FNr   )r   zbroadcast tensor)NNr   r  )<r   r  	functoolsr   r   typingr   r   r   r   r   	constantsr
   dataclassesr   r   importsr   r   r   torch_xla.core.xla_modelcore	xla_modelr   torch.distributedr   r   r%   r)   r;   r@   rG   rV   r`   rd   rl   ro   ry   r   r   	Exceptionr   r   r   r   r   r   r   r   intr   r   r   r   r   r	  r  r  r"  r%  r   r   r   <module>r1     sY    +     8 ; W W ')) "#*,
6=$ 4CX] 0f"J8$/$b(5.#PL	I 	<&  &2s 2# &Z: C  *S *@($, + +\    FW*
 
4	r   