
    ;i8u                       S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
JrJr  S SKJrJr  S SKrS SKJr  S SKJr  SSKJr  SS	KJrJr  SS
KJrJrJr  SSKJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(J)r)  SSK*J+r+J,r,J-r-  SSK.J/r/J.r.  SSK0J1r1  \"" SS9(       a  S SK2r2\" SS9(       a  S SK3r3\$" SS9(       a  S SK4r4\!" SS9(       a  S SK5r5S SK6J7r7  S SK8J9r:  Sr;\Rx                  " \=5      r>S r?S r@S\\A\B4   4S jrCS\R                  4S jrES\R                  S\G\R                  \A\A4   4S jrI      SgS\R                  S\BS \\A\B\R                  4   S!\\R                     S\\\B\R                  4      S"\\R                     S#\\L\A\L\R                  \R                  4   4      S$\MS%\M4S& jjrN ShS\R                  S'\MS(\MS)\M4S* jjrOSiS\R                  S(\MS+\M4S, jjrPS-\R                  4S. jrQS/ rRS0 rSS-\R&                  R                  S\T\T\B      4S1 jrUS2 rVS\\B\R                  4   S\R                  4S3 jrW   SjS-\R                  S\\\B\R                  4      S4\\L\B\\B\R                  4   4      S5\M4S6 jjrX  SkS-\R                  S\\\B\R                  4      S4\\L\B\\B\R                  4   4      4S7 jjrYS8\T\G\B\R&                  R                  4      S9\L\B\A4   S:\T\B   4S; jrZSlS<\\L\\A\B4   \\A\B4   4      4S= jjr[SmS>\L\B\\A\B\R                  4   4   S?\B4S@ jjr\SA r]SB r^     SnS-\R                  S<\\L\\A\B4   \\A\B4   4      S:\\T\B      S\\\B\R                  4      S4\\L\B\\B\R                  4   4      SC\M4SD jjr_S-\R&                  R                  4SE jr`    SoS-\R                  S<\\L\\A\B4   \\A\B4   4      S:\\T\B      S\\\B\R                  4      S4\\L\B\\B\R                  4   4      S\G\T\\A\B4      \L\\A\B4   \\A\B4   4   \T\\A\B4      \T\A   \L\B\A4   \T\T\B      \T\B   \T\G\B\R                  4      4   4SF jjraS\G\A\T\B   \T\R                     4   4SG jrb  SkS8\T\G\B\R                  4      S9\L\B\A4   SH\\A\B4   S:\\T\B      SI\\T\T\B         S\G\\B   \\R                     \T\G\B\R                  4      4   4SJ jjrc        SpS-\R                  S<\\L\\A\B4   \\A\B4   4      S:\\T\B      S\\\B\R                  4      S4\\L\B\\B\R                  4   4      SK\MSL\MSM\MSN\M4SO jjrdS-\R                  S>\L\B\\A\B\R                  4   4   4SP jreSlSQ jrfS-\R                  4SR jrg SqS\R                  S?\BSS\L\B\\B\R                  4   4   ST\\A\B\R                  4   4SU jjri          SrS-\R                  SV\\B\R                  4   S>\\L\B\\A\B\R                  4   4      SW\\\B\R                  4      S\\\B\R                  4      SX\MSM\MSY\\T\B      SZ\MS[\MS\\MS]\M4S^ jjrkSsS_\MS`\4Sa jjrlSlSb\4Sc jjrmS\R&                  R                  S\M4Sd jrn\ R                  SlS\R&                  R                  Se\\R                     4Sf jj5       rpg)t    N)OrderedDictdefaultdict)OptionalUnion)distributed)nn   )AcceleratorState   )SAFE_WEIGHTS_NAMEWEIGHTS_NAME)AutocastKwargsCustomDtypeDistributedType)	is_hpu_availableis_mlu_availableis_mps_availableis_musa_availableis_npu_availableis_peft_availableis_sdaa_availableis_torch_xla_availableis_xpu_available)clear_device_cacheget_xpu_available_memory)load_offloaded_weightoffload_weightsave_offload_index)is_tqdm_availabletqdm)is_torch_versionF)check_device)	safe_open)	load_filezpytorch_model.bin.index.jsonc                 ~    SSK Jn  [        5       (       a  SSKJn  [        5       =(       a    [        U" U 5      W5      $ )Nr   )extract_model_from_parallelr   )	PeftModel)otherr&   r   peftr'   
isinstance)modelr&   r'   s      i/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/accelerate/utils/modeling.pyis_peft_modelr-   I   s-    2"\:.I%.PR[#\\    c                 0   U R                   UR                   :w  a  gU R                   S:w  a,  U R                  c  [        R                  " U R                   SS9n UR                   S:w  a,  UR                  c  [        R                  " UR                   SS9nX:H  $ )aR  
Utility method to check if two `torch` devices are similar. When dealing with CUDA devices, torch throws `False`
for `torch.device("cuda") == torch.device("cuda:0")` whereas they should be the same

Args:
    first_device (`torch.device`):
        First device to check
    second_device (`torch.device`):
        Second device to check
Fcpur   index)typer2   torchdevice)first_devicesecond_devices     r,   check_device_samer8   R   s     M...E!l&8&8&@ ||L$5$5Q?U"}':':'B ]%7%7qA((r.   sizec                 D   SnSU  S3n [        U [        5      (       a  U nGOU R                  5       R                  S5      (       a  [        [	        U SS 5      S-  5      nGOU R                  5       R                  S5      (       a  [        [	        U SS 5      S	-  5      nGOSU R                  5       R                  S
5      (       a  [        [	        U SS 5      S-  5      nGOU R                  5       R                  S5      (       a8  [        [	        U SS 5      S-  5      nU R                  S5      (       a  US-  OUnOU R                  5       R                  S5      (       a8  [        [	        U SS 5      S-  5      nU R                  S5      (       a  US-  OUnO[U R                  5       R                  S5      (       a7  [        [	        U SS 5      S-  5      nU R                  S5      (       a  US-  OUnUS:  a  [        U5      eU$ ! [
         a    [        U5      ef = f)a  
Converts a size expressed as a string with digits an unit (like `"5MB"`) to an integer (in bytes).

Args:
    size (`int` or `str`): The size to convert. Will be directly returned if an `int`.

Example:

```py
>>> convert_file_size_to_int("1MiB")
1048576
```
z`size` z] is not in a valid format. Use an integer for bytes, or a string with an unit (like '5.0GB').GIBNi   @MIBi   KIBi   GBi ʚ;b   MBi@B KBi  r   )r*   intupperendswithfloat
ValueError)r9   mem_sizeerr_msgint_sizes       r,   convert_file_size_to_intrN   m   s    H
$tu "dC  HZZ\""5))5cr+u56HZZ\""5))5cr+u56HZZ\""5))5cr+u56HZZ\""4((5cr+u56H(,c(:(:x1}HZZ\""4((5cr+u56H(,c(:(:x1}HZZ\""4((5cr+u56H(,c(:(:x1}H !|!!O  "!!"s1   H	 >H	 #>H	 #>H	 #AH	 ?AH	 AH	 	Hdtypec                    U [         R                  :X  a  gU [        R                  :X  a  gU [        R                  :X  a  gU [        R
                  :X  a  g[        SS5      (       a%  U [         R                  [         R                  4;   a  g[        R                  " S[        U 5      5      nUc  [        SU  S	35      e[        UR                  5       S
   5      nUS-  $ )z
Returns the size (in bytes) occupied by one parameter of type `dtype`.

Example:

```py
>>> dtype_byte_size(torch.float32)
4
```
g      ?g      ?g      ?r   >=z2.1.0z[^\d](\d+)$z`dtype` is not a valid dtype: .r   rC   )r4   boolr   INT2INT4FP8r!   float8_e4m3fnfloat8_e5m2researchstrrJ   rF   groups)rO   
bit_searchbit_sizes      r,   dtype_byte_sizer_      s     

	+""	"	+""	"	+//	!	$	(	(Uu7J7JEL]L]6^-^>3u:6J9%BCC:$$&q)*Hq=r.   tensorreturnc                    [         R                  S[         R                  S[         R                  S[         R                  S[         R
                  S[         R                  S[         R                  S[         R                  S[         R                  S[         R                  S0
n U R                  5       R                  5       nU R                  5       R                  5       nU R(                  X#4$ ! [         a     U R                  5       R                  5       nU R                  5       R!                  5       XR"                     -  n Nf! [$         a%    SnU R'                  5       XR"                     -  n  Nf = ff = f)a  
Unique identifier to a tensor storage. Multiple different tensors can share the same underlying storage. For
example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
non-overlapping lifetimes may have the same id.
rC      r	   r   r   )r4   int64float32int32bfloat16float16int16uint8int8rS   float64untyped_storagedata_ptrnbytes	Exceptionstorager9   rO   NotImplementedErrornelementr5   )r`   _SIZEstorage_ptrstorage_sizes       r,   id_tensor_storagerw      s'    	QqQqQQ

A

AqEC,,.779--/668 ==+33  	C	C ..*335K!>>+002U<<5HHL" 	CK!??,u\\/BBL		C	Cs+   $<C. .
E99AE+E51E94E55E9moduletensor_namer5   valuefp16_statisticstied_params_mapnon_blockingclear_cachec	           	         SU;   aC  UR                  S5      n	U	SS  H$  n
[        X
5      nUc  [        U  SU
 S35      eUn M&     U	S   nXR                  ;  a   XR                  ;  a  [        U  SU S35      eXR                  ;   n[        X5      nUbP  UbM  UR                  5       U;   a9  X&UR                  5          ;   a#  XcR                  5          U   U R                  U'   gUbM  UR                  5       U;   a9  X&UR                  5          ;   a#  XmR                  5          U   U R                  U'   gUR                  [        R                  " S5      :X  a0  US[        R                  " S5      4;  a  Uc  [        U SU S35      eXR                  ;   a  U R                  U   OSn[        U5      nUb  UR                  UR                  :w  a9  UR                  S:w  a)  [        S	UR                   S
U SUR                   S35      eUc  UR                  UR                  US9nO8[        UR                  5      R                  S5      (       d  UR                  XGS9nSn[        R                  " 5          UbR  UR                  R                  S;  a8  [        R                  " U5      R                  S;   a  UR                  S;   a  UnSn[!        U["        5      (       ae  [%        5       (       a  SU 3nOP['        5       (       a  SU 3nO;[)        5       (       a  SU 3nO&[+        5       (       a  SU 3nO[-        5       (       a  SnS[        U5      ;   a  [/        5       (       d  [        U S35      eUc  UR                  X'S9nUby  US[        R                  " S5      4;   a]  [        UR                  5      R                  S5      (       d  UR                  XGS9nU(       d  U" UUR0                  S9U R                  U'   OC[!        U[        R2                  5      (       a  UR                  X'S9nO[        R4                  " X2S9nUb  UnU(       a  UU R                  U'   GO)Uc=  [7        [        R                  " U5      U R                  U   R                  5      (       Gd  [        U R                  U   5      nU R                  U   R8                  nUR                  S;   a  UR                  S:X  a<  UR                  [        R:                  :X  a  UR                  [        R<                  US9nUS:X  a  UR                  S:X  at  U" U4SUR0                  0UD6R                  S5      R                  S5      nUR>                  R                  S5      Ul        UR@                  R                  S5      Ul         OU" U4SUR0                  0UD6R                  X'S9nOUR                  S;   a6  [        RB                  RE                  UUR0                  S9R                  X'S9nO>UR                  S;   a  UR                  X'S9nOU" UUR0                  S9R                  X'S9nUU R                  U'   Ub"  UR                  X'S9U R                  U   l         AU RF                  R                  S :X  Ga  [        U RH                  S!S5      c  [        U RH                  R                  5      S:w  a  [        R                  " U5      R                  S":X  a   [        R                  " U5      RJ                  OSn[        U RH                  S!S5      (       dd  Uba  U RL                  b6  U RL                  R                  R                  S:w  a  U RO                  U5      n OU RL                  c  U RO                  U5      n OU RF                  R                  S#:X  a  [        U RH                  S$S5      c  [        U RH                  R                  5      S:w  a  [        R                  " U5      R                  S":X  a   [        R                  " U5      RJ                  OSn[        U RH                  S$S5      (       d#  Ub   U RH                  RO                  U5      U l$        SSS5        U(       a  US%;  a
  [Q        5         Ub@  UR                  5       U;   a,  X&UR                  5          ;  a  WXmR                  5          U'   gUbF  UbB  UR                  5       U;   a-  X&UR                  5          ;  a  WXcR                  5          U'   ggggg! , (       d  f       N= f)&a  
A helper function to set a given tensor (parameter of buffer) of a module on a specific device (note that doing
`param.to(device)` creates a new tensor not linked to the parameter, which is why we need this function).

Args:
    module (`torch.nn.Module`):
        The module in which the tensor we want to move lives.
    tensor_name (`str`):
        The full name of the parameter/buffer.
    device (`int`, `str` or `torch.device`):
        The device on which to set the tensor.
    value (`torch.Tensor`, *optional*):
        The value of the tensor (useful when going from the meta device to any other device).
    dtype (`torch.dtype`, *optional*):
        If passed along the value of the parameter will be cast to this `dtype`. Otherwise, `value` will be cast to
        the dtype of the existing parameter in the model.
    fp16_statistics (`torch.HalfTensor`, *optional*):
        The list of fp16 statistics to set on the module, used for 8 bit model serialization.
    tied_params_map (Dict[int, Dict[torch.device, torch.Tensor]], *optional*, defaults to `None`):
        A map of current data pointers to dictionaries of devices to already dispatched tied weights. For a given
        execution device, this parameter is useful to reuse the first available pointer of a shared weight on the
        device for all others, instead of duplicating memory.
    non_blocking (`bool`, *optional*, defaults to `False`):
        If `True`, the device transfer will be asynchronous with respect to the host, if possible.
    clear_cache (`bool`, *optional*, defaults to `True`):
        Whether or not to clear the device cache after setting the tensor on the device.
rR   Nr;   z has no attribute z- does not have a parameter or a buffer named metaz7 is on the meta device, we need a `value` to put in on 
Params4bitz Trying to set a tensor of shape z in "z" (which has shape z), this looks incorrect.)r}   z
torch.uintz	torch.intz
torch.bool)cudaxpu)
Int8Params	FP4Paramsr   r0   npu:zmlu:zsdaa:zmusa:hpur   z6 is not available, you should use device="cpu" instead)requires_gradr5   r   r   r   )QTensorQBitsTensor)AffineQuantizedTensorLinear8bitLtSCBr   
Linear4bitquant_state)r0   r   ))splitgetattrrJ   _parameters_buffersrn   r5   r4   r3   shape__name__torO   r[   
startswithno_gradr*   rF   r   r   r   r   r   r   r   Tensorr`   r8   __dict__re   rh   CBr   r   	Parameter	__class__weightr2   biasr   r   )rx   ry   r5   rz   rO   r{   r|   r}   r~   splitsr   
new_module	is_buffer	old_valueparam	param_clsdevice_quantization	new_valuekwargsdevice_indexs                       r,   set_module_tensor_to_devicer      sL   N k""3'CR[E /J! F8+=eWA!FGGF	 !
 Rj,,,OO1SF8#PQ\P]]^_``.I,I
 	'NN/enn&677*9..:J*KF*S;'# O3i&8&8&:;;*9:L:L:N*OPV*W;'5<<//F65<<X^K_B`4`ejerK=(_`f_gghijj/:>P>P/PF{+VZEUI ??ekk)i.@.@L.P25;;-u[MQdenetetdu  vN  O  =HHY__<HHEU[[!,,-VWWHHUH>E	 !!8V$))_<""&OO"(Ffc""!!x!##x"$$ )"$$ )!##CK(8(:(:x']^__=!VGI VV8L/M%M9??+667`aa )U NI 6?	YbYpYp6qF&&{3u||,,CIU:I*(F+4FOOK(&7V8LfN`N`alNmNtNt&u&uV//<=I''4==F!!%NN%%5)//U]]:Z )U]] VIU?y'9'9\'I )) e9CZCZ e^d e h hij k n not uI#,<<??5#9IL$-MM$4$4U$;IM )) e9CZCZ e^d e h h !i !I ##'AA!HH..y	H_H_.`cc d 	 ##'@@%LLLK	%iy?V?VWZZ [ 	 /8F{+*6E6H6H6H6k"";/3#   ))^;FMM5$7?,,-7 >C\\&=Q=V=VZ`=`u||F399fjv}}eT::|?W{{.6;;3E3E3J3Jf3T!'\!:,!'\!:  ))\9FMM=$?G,,-7 >C\\&=Q=V=VZ`=`u||F399fjv}}mTBB|G_$*MM$6$6|$DFM 
D v_4
 	# O3)*<*<*>??8A**,-f5'NN/%..*:;;4=()&1 < 0 ( 	] 
s   7Xd77
einclude_buffersrecurseremove_non_persistentc              #      #    U R                  US9 Sh  vN   U(       a@  [        5       nU(       a	  [        XS9nU R                  US9 H  nUu  pgXd;  d  M  Uv   M     gg NL7f)a  
A helper function that gathers all the tensors (parameters + buffers) of a given module. If `include_buffers=True`
it's the same as doing `module.named_parameters(recurse=recurse) + module.named_buffers(recurse=recurse)`.

Args:
    module (`torch.nn.Module`):
        The module we want the tensors on.
    include_buffer (`bool`, *optional*, defaults to `True`):
        Whether or not to include the buffers in the result.
    recurse (`bool`, *optional`, defaults to `False`):
        Whether or not to go look in every submodule or just return the direct parameters and buffers.
    remove_non_persistent (`bool`, *optional*, defaults to `False`):
        Whether or not to remove the non persistent buffer from the buffers. Useful only when include_buffers =
        True
r   N)named_parameterssetget_non_persistent_buffersnamed_buffers)rx   r   r   r   non_persistent_buffersnamed_buffername_s           r,   named_module_tensorsr     sm     $ &&w&777!$ %?%X""000AL"GD1"" B	  8s   A$A">A$A$fqnsc                     U R                   nU(       aT  U R                  5        H@  u  pEU(       a&  X5R                    Vs1 s H
  odS-   U-   iM     sn-  nM2  X5R                   -  nMB     U$ s  snf )a  
Gather all non persistent buffers of a given modules into a set

Args:
    module (`nn.Module`):
        The module we want the non persistent buffers on.
    recurse (`bool`, *optional*, defaults to `False`):
        Whether or not to go look in every submodule or just return the direct non persistent buffers.
    fqns (`bool`, *optional*, defaults to `False`):
        Whether or not to return the fully-qualified names of the non persistent buffers.
rR   )_non_persistent_buffers_setnamed_modules)rx   r   r   non_persistent_buffers_setnmrB   s          r,   r   r     sn     "(!C!C((*DA*DaDa.bDaq3w{Da.bb**.K.KK*	 + &%	 /cs   A*r+   c                    SnSnSnS[         R                  " U R                  5       Vs/ s H  oDR                  PM     sn;   a  SnSn[	        U S5      (       a@  [	        U R
                  S5      (       a  U R
                  R                  SS9OU R
                  nUSL=(       a$    [        USS5      =(       a    U R                  5       n[	        U S5      =(       a5    [        U R
                  S	S5      =(       a    [        U R
                  S
S5      n[        S U R                  5        5       5      n[        XU/5      $ s  snf )z
Check if there is any indication in the given model that some weights should be tied.

Args:
    model (`torch.nn.Module`): The model to inspect

Returns:
    bool: True if the model needs to have tied weights
FPreTrainedModelNconfigget_text_configT)decodertie_word_embeddingsis_encoder_decodertie_encoder_decoderc              3   :   #    U  H  n[        US 5      v   M     g7f)_tie_weightsN)hasattr).0rx   s     r,   	<genexpr>2check_tied_parameters_in_config.<locals>.<genexpr>  s     \O&gfn==Os   )inspectgetmror   r   r   r   r   r   get_output_embeddingsanymodules)r+   has_tied_word_embeddinghas_tied_encoder_decoderhas_tied_modulecmodel_decoder_configs         r,   check_tied_parameters_in_configr     s.    $$O1PQ1PAZZ1PQQ"'#5(## 5<<):;; ,,T,:\\ ! !, .,.CUK.++- 	  E8$ D&:EBD&;UC 	!
 \EMMO\\'?STT+ Rs   D>c                     X;   a  X   $ SR                  U R                  S5      S S 5      nX :X  a  [        SU  S35      e[        X!5      $ )NrR   r;   z-The `device_map` does not contain the module )joinr   rJ   _get_param_device)r   
device_mapparent_params      r,   r   r   	  sU      88EKK,Sb12LHqQRR ::r.   c                     U  H\  n0 nU H  n[        XA5      X4'   M     [        [        UR                  5       5      5      S:  d  MC  [        R                  SU S35        M^     g)a  
Check if tied parameters are on the same device

Args:
    tied_params (`List[List[str]]`):
        A list of lists of parameter names being all tied together.

    device_map (`Dict[str, Union[int, str, torch.device]]`):
        A map that specifies where each submodule should go.

r   z*Tied parameters are on different devices: zC. Please modify your custom device map or set `device_map='auto'`. N)r   lenr   valuesloggerwarning)tied_paramsr   	tie_paramtie_param_devicesr   s        r,   $check_tied_parameters_on_same_devicer     sj     !	E'8'K$ s$++-./!3NN<=N<O PT T !r.   c                 4   U R                  SS9 VVs0 s H  u  p#X#_M	     nnnU R                  SS9 VVs0 s H  u  p#X#_M	     nnn[        UR                  5       5      [        UR                  5       5      -
  n0 nU HB  nXH   n	UR                  5        H'  u  pX9L d  M  X;  a  / Xz'   Xz   R	                  U5        M)     MD     UR                  5        VVs/ s H&  u  p[        U/[        [        U5      5      -   5      PM(     snn$ s  snnf s  snnf s  snnf )a  
Find the tied parameters in a given model.

<Tip warning={true}>

The signature accepts keyword arguments, but they are for the recursive part of this function and you should ignore
them.

</Tip>

Args:
    model (`torch.nn.Module`): The model to inspect.

Returns:
    List[List[str]]: A list of lists of parameter names being all tied together.

Example:

```py
>>> from collections import OrderedDict
>>> import torch.nn as nn

>>> model = nn.Sequential(OrderedDict([("linear1", nn.Linear(4, 4)), ("linear2", nn.Linear(4, 4))]))
>>> model.linear2.weight = model.linear1.weight
>>> find_tied_parameters(model)
[['linear1.weight', 'linear2.weight']]
```
F)remove_duplicateT)r   r   keysitemsappendsortedlist)r+   r   r   r   all_named_parametersno_duplicate_named_parameterstied_param_namestied_param_groupstied_param_name
tied_param
param_namer   tieds                r,   find_tied_parametersr   *  s+   > <A;Q;Qch;Q;ij;iKDDK;ij EJDZDZlpDZDq$rDq[TT[Dq!$r /4467#>[>`>`>b:cc +):
!>!D!D!FJ"646%1!-44_E "G , DUCZCZC\]C\<6FF8d3t9o-.C\]]+ k %s" ^s   DD-Dc                    U H  nSnU Hh  nU nUR                  S5      nUSS  H  n[        XW5      nM     [        XVS   5      nUb  M@  UR                  [        R                  " S5      :w  d  Mf  Un  O   Uc  Mx  U H<  nU nUR                  S5      nUSS  H  n[        XW5      nM     [	        XVS   U5        M>     M     g)a<  
Reties tied parameters in a given model if the link was broken (for instance when adding hooks).

Args:
    model (`torch.nn.Module`):
        The model in which to retie parameters.
    tied_params (`List[List[str]]`):
        A mapping parameter name to tied parameter name as obtained by `find_tied_parameters`.
NrR   r;   r   )r   r   r5   r4   setattr)	r+   r   
tied_groupparam_to_tier   rx   r   r   r   s	            r,   retie_parametersr   a  s     "
$JF%%c*F / %F2J/E#V8L(L$ % #(
#))#.#CR[E$V3F )r
L9 ) "r.   c                 t    [        U [        5      (       a"  U R                  SS5      n [        [        U 5      n U $ )z,
Just does torch.dtype(dtype) if necessary.
torch. )r*   r[   replacer   r4   rO   s    r,   _get_proper_dtyper    s2     %h+u%Lr.   special_dtypesbuffers_onlyc                    Ub  [        U5      n[        U5      nUb\  UR                  5        VVs0 s H  u  pVU[        U5      _M     nnnUR                  5        VVs0 s H  u  pVU[        U5      _M     nnn[        [        5      n/ n	U(       d  [        U SS9n	OU R                  SS9n	U	 GH  u  pUb  X;   a  UR                  5       WU
   -  nOUc'  UR                  5       [        UR                  5      -  nO[        UR                  5      R                  S5      (       a'  UR                  5       [        UR                  5      -  nO0UR                  5       [        W[        UR                  5      5      -  nU
R                  S5      n[        [        U5      S-   5       H"  nUSR                  USU 5      ==   U-  ss'   M$     GM      U$ s  snnf s  snnf )z6
Compute the size of each submodule of a given model.
NTr   r   rR   r   )r  r_   r   r   rF   r   r   numelrO   r[   r   minr   ranger   r   )r+   rO   r  r  
dtype_sizekeydtypspecial_dtypes_sizemodule_sizesmodule_listr   r`   r9   
name_partsidxs                  r,   compute_module_sizesr    s    !%($U+
!HVH\H\H^_H^93#066H^_KYK_K_KabKaicsOD$99Kabs#LK*5$?))$)7#%$*@<<>$7$==D]<<>OFLL$AAD))*STT <<>OFLL$AAD<<>C
OFLL4Q$RRDZZ_
Z1,-C*Tc"234<4 . $ 5 `bs   GGc                 <    [        XUSS9nUR                  SS5      $ )zG
Compute the total size of buffers in each submodule of a given model.
T)rO   r  r  r   r   )r  get)r+   rO   r  r  s       r,    compute_module_total_buffer_sizer    s&     (>hlmLB""r.   r   r  no_split_module_classesc                 
   Sn/ nU R                  5       n[        U5      S:  a  UR                  S5      u  pg[        U[        R
                  R                  5      (       a  [        UR                  5       5      O/ n[        U5      S:X  d  UR                  R                  U;   a&  X   n	X:  a  U	nU/nO8X:X  a  UR                  U5        O!U V
Vs/ s H  u  pU SU
 3U4PM     snn
U-   n[        U5      S:  a  M  X44$ s  snn
f )a  
Utility function that will scan a list of named modules and return the maximum size used by one full layer. The
definition of a layer being:
- a module with no direct children (just parameters and buffers)
- a module whose class name is in the list `no_split_module_classes`

Args:
    modules (`List[Tuple[str, torch.nn.Module]]`):
        The list of named modules where we want to determine the maximum layer size.
    module_sizes (`Dict[str, int]`):
        A dictionary mapping each layer name to its size (as generated by `compute_module_sizes`).
    no_split_module_classes (`List[str]`):
        A list of class names for layers we don't want to be split.

Returns:
    `Tuple[int, List[str]]`: The maximum size of a layer with the list of layer names realizing that maximum size.
r   rR   )copyr   popr*   r4   r   Moduler   named_childrenr   r   r   )r   r  r  max_sizelayer_namesmodules_to_treatmodule_namerx   modules_childrenr9   r   vs               r,   get_max_layer_sizer"    s   ( HK||~

!
#.2215<Fvuxx<_<_4 5 5 78eg A%)9)9)B)BF])],D*m!"";/FVWFVdaK=!!5q 9FVWZjj 
!
#     Xs   C?
max_memoryc                 $   SSK nU Gc  0 n [        5       (       a  [        [        R                  R                  5       5       HR  n [        R                  " S[        R                  " SU5      S9n[        R                  R                  U5      S   X'   MT     GO[        5       (       a  [        [        R                  R                  5       5       HR  n [        R                  " S[        R                  " SU5      S9n[        R                  R                  U5      S   X'   MT     GO[        5       (       a  [        [        R                  R                  5       5       HR  n [        R                  " S[        R                  " SU5      S9n[        R                  R                  U5      S   X'   MT     GO[!        5       (       a  [        [        R"                  R                  5       5       HR  n [        R                  " S[        R                  " S	U5      S9n[        R"                  R                  U5      S   X'   MT     GOl[%        5       (       ag  [        [        R&                  R                  5       5       H;  n [        R                  " S[        R                  " S
U5      S9n[)        U5      X'   M=     O[+        5       (       a~  [        [        R,                  R                  5       5       HR  n [        R                  " S[        R                  " SU5      S9n[        R,                  R                  U5      S   X'   MT     Oi[        [        R.                  R                  5       5       H>  n [        R                  " S/US9n[        R.                  R                  U5      S   X'   M@     [1        5       (       a  UR3                  5       R4                  U S'   U $ UR3                  5       R4                  U S'   U $ U  H+  n[7        X   [8        5      (       d  M  [;        X   5      X'   M-     U R=                  5        Vs/ s H  n[7        U[>        5      (       d  M  UPM     nnURA                  5         [        5       (       a   [        R                  R                  5       nGO[        5       (       a  [        R                  R                  5       nO[        5       (       a  [        R                  R                  5       nO[!        5       (       a  [        R"                  R                  5       nOz[%        5       (       a  [        R&                  R                  5       nOL[+        5       (       a  [        R,                  R                  5       nO[        R.                  R                  5       nU H=  nX:  d  US:  d  M  [        RC                  SU S[E        [        U5      5       35        M?     US Vs/ s H  oUU R=                  5       ;   d  M  UPM     sn-   n	U R=                  5        H  nXY;  d  M
  [G        SU S35      e   U	 Vs0 s H  oUX   _M	     n nU $ ! [         a    [        R                  SU S35         GM<  f = f! [         a    [        R                  SU S35         GM  f = f! [         a    [        R                  SU S35         GMv  f = f! [         a    [        R                  SU S35         GM  f = f! [         a    [        R                  SU S35         GM  f = f! [         a    [        R                  SU S35         GMe  f = f! [         a    [        R                  SU S35         GM  f = fs  snf s  snf s  snf )zZ
Get the maximum memory available if nothing is passed, converts string to int otherwise.
r   Nnpur   zDevice z; seems unavailable, Proceeding to check subsequent devices.mlusdaamusar   r   mpsr0   z) is not available, available devices are )r)  r0   diskzX is not recognized, available devices are integers(for GPU/XPU), 'mps', 'cpu' and 'disk')$psutilr   r	  r4   r%  device_countr`   r5   mem_get_inforp   r   infor   r&  r   r'  r   r(  r   r   r   r   r   r   r   virtual_memory	availabler*   r[   rN   r   rF   sortr   r   rJ   )
r#  r+  ir   r  kgpu_devicesnum_devicesr5   all_devicess
             r,   get_max_memoryr7    s    
5991134Qu||E1/EFA$)II$:$:1$=a$@JM 5 5991134Qu||E1/EFA$)II$:$:1$=a$@JM 5   5::2245Qu||FA/FGA$)JJ$;$;A$>q$AJM 6   5::2245Qu||FA/FGA$)JJ$;$;A$>q$AJM 6 5991134Qu||E1/EFA$<Q$?JM 5 5991134Qu||E1/EFA$)II$:$:1$=a$@JM 5 5::2245aS3A$)JJ$;$;A$>q$AJM 6  & 5 5 7 A AJu  !' 5 5 7 A AJujos++6zGJO  )oo/F/:a3E1/KFii,,.			ii,,.			jj--/			jj--/			ii,,.			ii,,.jj--/ FQJNNWVH,UVZ[`al[mVnUopq  ,B ],Bq:??K\F\,B ]]K__!tu  
 -88KqZ]"KJ8} ! KK'!,g hi ! KK'!,g hi ! KK'!,g hi ! KK'!,g hi ! KK'!,g hi ! KK'!,g hi ! KK'!,g hi  G* !^ 9s   AYAZ!AZ,/A[=7\3A\-1:]?^^/^
^^#Y>=Y>#Z)(Z),#[[#[?>[?#\*)\*-#]]#^ ?^ r   r  c                   ^ US:X  a  SOU S3nU R                  5        VVs/ s H  u  p4UR                  U5      (       d  M  UPM!     nnn[        [        U5      5      S:X  aI  [        U5      S:  a:  U  Vs/ s H  o3R                  U5      (       d  M  UPM     sn H  nX	 M     US   X'   U R	                  5        Vs/ s H6  o3R                  U5      (       d  M  [        U5      [        U5      :  d  M4  UPM8     nn[        U5      S:  a  [        UR                  S5      5      S-   OSm[        U4S jU 5       5      nU H  n[        XS9  M     U $ s  snnf s  snf s  snf )zU
Cleans a device_map by grouping all submodules that go on the same device together.
r   rR   r   r   c              3   j   >#    U  H(  nS R                  UR                  S 5      ST 5      v   M*     g7frR   N)r   r   )r   r3  r  s     r,   r   #clean_device_map.<locals>.<genexpr>f  s.     RAQA388AGGCL#$677AQs   03)r  )r   r   r   r   r   r   clean_device_map)	r   r  prefixr3  r!  r   children_moduleschildr  s	           @r,   r<  r<  W  s8   
 "$R[M*;F&,,.G.DA!,,v2Fa.FG
3v;1Vq'@Z<<+?!Z@A A"()
 $.??#4k#4aV8LQTUVQWZ]^iZjQj#4k-0-=-A#k$
%
)qCRAQRR!7 "  H@
 ls)   EE6EE>EE5Ec           	         Ub  [        U5      S:X  a  gUR                  5        H  u  p4SU;   a  M  SnSU;   ad  UR                  SS5      UR                  5       ;   a@  UR                  SS5      n[	        [
        R                  R                  X& S35      X   5      n[
        R                  R                  X# S35      n[	        Xt5      n[        XSXS9  M     g)az  
Loads the weights from the offload folder into the model.

Args:
    model (`torch.nn.Module`):
        The model to load the weights into.
    index (`dict`):
        A dictionary containing the parameter name and its metadata for each parameter that was offloaded from the
        model.
    offload_folder (`str`):
        The folder where the offloaded weights are stored.
Nr   r   r   z.datr0   )rz   r{   )	r   r   r  r   r   ospathr   r   )	r+   r2   offload_folderr   metadatar{   weight_nametensor_filer   s	            r,   load_offloaded_weightsrG  m  s     }E
a %
Jz!j&8&85&IUZZ\&Y$,,Xu=K3^}D-ABEDVO ggll>\3FG&{=#EuFl !.r.   c                     0 nU  H=  nUS:X  d  SU;  a  M  UR                  SS5      S   nUR                  US5      S-   X'   M?     U  Vs/ s H$  o!R                  US5      S:X  d  M  US:w  d  M"  UPM&     nnU$ s  snf )Nr   rR   r   r   )rsplitr  )r  module_childrenrx   parentleavess        r,   get_module_leavesrM    s    OR<3f,sA&q)"1"5"5fa"@1"D	 
 $0h<3F3Fvq3QUV3Vf[aeg[gf<FhM is   
A9'A9/A9low_zeroc                    USL n[        U5      n[        5       (       a  SnOn[        5       (       a  SnO\[        5       (       a  SnOJ[	        5       (       a  SnO8[        5       (       a  SnO&[        5       (       a  SnO[        5       (       a  SnOS	n[        U Vs/ s H5  n[        R                  " U5      R                  U:X  d  M)  X   S
:  d  M3  UPM7     sn5      n	U	S
:X  a  U$ U	S:X  a\  SnU(       aS  UR                  5        H?  n
[        U
[        5      (       d  M  X==   S-  ss'   [        R!                  SU
 S35          O   [#        XUS9nUS   U(       a  U	S-
  OU	-  nUc  / nO[        U[$        [&        45      (       d  U/n[        U5      S
:  a  0 nUR)                  5        H  u  pUS:X  a  M  U nUR+                  S5       H  n[-        UU5      nM     UR.                  R0                  nUU;   a
  UU;  a  XU'   [3        UR                  5       5      [3        U5      :X  d  M    O   [        U5      S
:  a  [5        UR7                  5       5      OS
nOS
n[9        U5      n[3        U5      nUR)                  5        VVs0 s H  u  nnUU;  d  M  UU_M     nnn[9        U5      n[        [;        U Vs/ s H  nUU   PM
     sn5      [5        [        U5      S5      -  5      n[        S[5        UU5      -  5      nUU-  n[%        [=        S UR)                  5        5       5      5      nUSS  H'  n[?        U(       a  US
:X  a  US
   OUUU   5      UU'   M)     U(       aK  [5        S
US   [;        [A        SU	5       Vs/ s H  nUU   PM
     sn5      -
  5      n[?        UUS
   5      US
'   U$ s  snf s  snnf s  snf s  snf )a$  
Compute a `max_memory` dictionary for [`infer_auto_device_map`] that will balance the use of each available GPU.

<Tip>

All computation is done analyzing sizes and dtypes of the model parameters. As a result, the model can be on the
meta device (as it would if initialized within the `init_empty_weights` context manager).

</Tip>

Args:
    model (`torch.nn.Module`):
        The model to analyze.
    max_memory (`Dict`, *optional*):
        A dictionary device identifier to maximum memory. Will default to the maximum memory available if unset.
        Example: `max_memory={0: "1GB"}`.
    no_split_module_classes (`List[str]`, *optional*):
        A list of layer class names that should never be split across device (for instance any layer that has a
        residual connection).
    dtype (`str` or `torch.dtype`, *optional*):
        If provided, the weights will be converted to that type when loaded.
    special_dtypes (`Dict[str, Union[str, torch.device]]`, *optional*):
        If provided, special dtypes to consider for some specific weights (will override dtype used as default for
        all weights).
    low_zero (`bool`, *optional*):
        Minimizes the number of weights on GPU 0, which is convenient when it's used for other operations (like the
        Transformers generate function).
Nr%  r&  r'  r(  r   r   r)  r   r   r   Fg?z(We will use 90% of the memory on device z for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).rO   r  r   rR   g      ?c              3   h   #    U  H(  u  p[        U[        5      (       d  M  US :  d  M$  Uv   M*     g7f)r   N)r*   rF   )r   	device_id
device_mems      r,   r   &get_balanced_memory.<locals>.<genexpr>  s/      
3E/)T]_bIcIhruvhvII3Es   22	2r;   )!r7  r   r   r   r   r   r   r   r   r4   r5   r3   r   r*   rF   r   r.  r  r   tupler   r   r   r   r   r   maxr   rM  sumr   r  r	  )r+   r#  r  rO   r  rN  user_not_set_max_memoryexpected_device_typedr5  r  r  per_gpuno_split_childrenr   r9   	submodulesubmodule_name
class_namebufferrL  
leaves_setr   r!  mean_leavesgpus_idx_listr  r2  min_zeros                                r,   get_balanced_memoryre    s   J )D0
+J$			$			%			%			$			$			$%*s*QQ0D0DH\0\qakanqrarq*stKaa"!(c3''Os*OKKB3% Ho o  ) (>ZL2h;?KPG &"$/$??#:"; "#a'&,,.JDrzI"&**S/#I~>	 #2",,55J44K\9\04*-$))+,4K0LL / 588I4JQ4N&--/0TU |,FVJ%1%7%7%9Q%9TQQj=PDAqD%9LQ|,FcF;Fq<?F;<s3v;PQ?RRSKFK001FvG  
3=3C3C3E
 	
M Sb!xC1Hjm'S]^aSbc
3 " q,r*SqR]I^1_I^A*Q-I^1_-``aHjm4
1W tl R < 2`s*   (N2N2N2N7N78N=Oc                     [        U 5      n[        U SS5      nUc  / n[        U R                  SS95      [        U R	                  5       5      -   [        U R                  SS95      -   n[        X1U5      nUS   nXT4$ )z:Computes the total size of the model and its largest layer_no_split_modulesNFr   r   )r  r   r   r   r  r   r"  )r+   sizesno_split_modulesr  largest_layer
total_sizes         r,   calculate_maximum_sizesrl    s     'Eu&94@ 	U##E#23
u##%
&	'
u""5"1
2	3 
 ''7@PQMrJ$$r.   c                    [        U5      nUc  / nO[        U[        [        45      (       d  U/n[        UR	                  5       5      nSU;  a  UR                  S5        U Vs/ s H  ofS;  d  M
  UPM     nnSU;   a  S/nO[        U5      S:  a  US   S/nOS/n[        XUS9n	[        U 5      n
[        U 5      (       a$  [        U
5      S:X  a  [        R                  S5        [        U R                  SS	95      [        U R                  5       5      -   [        U R                  SS	95      -   nUUUUU	U
UU4$ s  snf )
zR
Initialize variables required for computing the device map for model allocation.
r*  )r0   r*  r)  r   r0   rP  rThe model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.Fr   )r7  r*   r   rU  r   r   r   r  r   r   r   r   r   r  r   )r+   r#  r  rO   r  devicesr5   gpusmain_devicesr  tied_parametersr  s               r,   _init_infer_auto_device_maprs  .  sZ   &  
+J&"$/$??#:";:??$%GWv!(Jv/,IFDJ }w	TQQ'w'>ZL*51O&u--#o2F!2K A	
 	U##E#23
u##%
&	'
u""5"1
2	3  		 	1 Ks   $	D;1D;c           
      |   [        U 5      S:  a  U/ / 4$ / n/ nU  Hr  n[        U5       VVV	s/ s H%  u  nu  pUR                  US-   5      (       d  M#  UPM'     sn	nnS   n
UR                  X:   S   5        UR                  X:   S   5        Mt     Un[	        X5       H  u  plXU   X&   -
  -  nM     XU4$ s  sn	nnf )a  
Calculate the total size of a module, including its tied parameters.

Args:
    tied_params (`List[str]`): The list of tied parameters.
    module_size (`int`): The size of the module without tied parameters.
    module_sizes (`Dict[str, int]`): A dictionary mapping each layer name to its size.
    modules_to_treat (`List[Tuple[str, nn.Module]]`): The list of named modules to treat.

Returns:
    `Tuple[int, List[str], List[nn.Module]]`: The total size of the module, the names of the tied modules, and the
    tied modules.
r   rR   r   )r   	enumerater   r   zip)r   module_sizer  r  tied_module_namestied_modulesr   r2  r   r   tied_module_indexmodule_size_with_tiestied_module_names                r,   get_module_size_with_tiesr}  n  s    & ;!B""L!
09:J0Kn0K91fqzOdOdefilelOmQ0Knopq  !1!DQ!GH,?BC "
 ((+K(K$
.>!?,BZ!ZZ )L !\AA os   "B7
B7

size_limitrr  c                   ^  [        U5      nUc  / nUc  / nU R                  5       nSnU(       Ga  UR                  S5      u  mnU Vs/ s H=  n[	        U4S jU 5       5      (       d  M  [        U4S jU 5       5      (       a  M;  UPM?     n	n[        U	 VV
s/ s H#  o V
s/ s H  n
TS-   U
S-   ;  d  M  U
PM     sn
PM%     sn
n/ 5      n[        XT   X5      u  n  nX::  a  SnO[        U[        R                  5      (       d  [        U[        R                  5      (       a  / O[        UR                  5       5      n[        U5      S:X  d  UR                   R"                  U;   a  GMC  [        UR%                  SS95      U-   nU VVs/ s H  u  nnT SU 3U4PM     snnU-   nU(       a  GM  U(       d  SSU 4$ U  VVs/ s H  u  pUPM	     nnn['        T5       VVs/ s H  u  nnUS:X  d  M  UPM     nnnU H  nTSU nUU;   d  M  UR)                  U5      nU U   u  nn[        UR%                  SS95      [        UR                  5       5      -   nU SU U VVs/ s H  u  nnU SU 3U4PM     snn-   U US	-   S -   n U  VVs/ s H  u  pUPM	     nnnM     UR)                  T5      nU R                  U5      u  mnTXp4$ ! [         a    SSU 4s $ f = fs  snf s  sn
f s  sn
nf s  snnf s  snnf s  snnf s  snnf s  snnf )
a  
Find a module that fits in the size limit using BFS and return it with its name and the remaining modules.

Args:
    modules (`List[Tuple[str, nn.Module]]`):
        The list of named modules to search in.
    module_sizes (`Dict[str, int]`):
        A dictionary mapping each layer name to its size (as generated by `compute_module_sizes`).
    size_limit (`Union[int, str]`):
        The maximum size a module can have.
    no_split_module_classes (`Optional[List[str]]`, *optional*):
        A list of class names for layers we don't want to be split.
    tied_parameters (`Optional[List[List[str]]`, *optional*):
        A list of lists of parameter names being all tied together.

Returns:
    `Tuple[Optional[str], Optional[nn.Module], List[Tuple[str, nn.Module]]]`: A tuple containing:
    - The name of the module that fits within the size limit.
    - The module itself.
    - The list of remaining modules after the found module is removed.
NFr   c              3   :   >#    U  H  nTS -   US -   ;   v   M     g7fr:   r   r3  r   s     r,   r   $fallback_allocate.<locals>.<genexpr>       =*Q4#:S(*   c              3   :   >#    U  H  nTS -   US -   ;   v   M     g7fr:  r  r  s     r,   r   r    %     Itisde$QT*XY\_X_J_isr  rR   Tr   r   )rN   rJ   r  r  r   allrW  r}  r*   r   r   r4   r   r   r  r   r   r   r   ru  r2   )r   r  r~  r  rr  modules_to_searchmodule_foundrx   r   r   pr   r{  r   r   r   r!  current_namesr2  r   dot_idx	dot_indexparent_nameparent_module_idxparent_modulerJ  
target_idxr   s                              @r,   fallback_allocater    sg   8#-j9
 &"$L
(,,Q/f .
-
=*== FIItisItFt - 	 
 TefTejAAtczS'@aATefhj
 '@d+\'
#q!
 !.L
 &",,//:fell3S3S f++-. 	  A%)9)9)B)BF])]   7 7 7 FGJZZ<LM<LDAqasmQ/<LMPaaG 
J T7"" $++741Q7M+&t_9_TQSq_G9	:I&-' - 3 3K @&'89A}"=#A#A%#A#PQTX,,.U O **+9HIA{m1QC(!,IJ+a/123 
 ,33741Q7M3M   $$T*J;;z*LD&  W  #T7""#
 Bf2 N ,9 J 4se   K 
K!,K!K!	K+
#K&7K&=K+
K1K7$K=5K=$L
L	KK&K+
verboseclean_resultoffload_buffersfallback_allocationc	                   ^8^9 [        XX#U5      u  n	nn
nnnnn[        5       nSnU	 Vs0 s H  nUS_M     nn0 n0 n[        XU5      u  nn[        U5      S:  Ga  UR	                  S5      u  m8nU(       a  [        ST8 S35        U Vs/ s H(  nUT8:w  d  M  UR                  T8S-   5      (       a  M&  UPM*     nn[        U5      S:X  aS  [        U VVs/ s H5  u  nn[        U[        R                  R                  5      (       d  M1  UU4PM7     snnUU5      u  nnUT8   nU Vs/ s H=  n[        U84S jU 5       5      (       d  M  [        U84S jU 5       5      (       a  M;  UPM?     nnU(       a  [        U5      S:  a  [        SU 35        [        U VVs/ s H$  nU Vs/ s H  nT8S-   US-   ;  d  M  UPM     snPM&     snn/ 5      nU(       a  [        U5      S:  a  [        SU 35        U	U   nUS:w  a  UU   OS	nSn U	U   U
;   a  UU-
  nUn [        UUX5      u  n!n"n#Ub  UU   U!-   U::  a  U(       aD  S
T8 3n$U"(       a	  U$SU" 3-  n$O	U$SU S3-  n$Ub  U$SUUU   -
   S3-  n$U$SU S3-  n$[        U$5        UU==   U!-  ss'   UUT8'   U" HQ  m9T9U Vs/ s H  nUS   PM
     sn;   a/  [        U94S j[!        U5       5       5      n%UR	                  U%5        UUT9'   MS     U(       dA  [        U[        R                  5      (       a"  [#        UX4S9n&UR%                  US5      U&-   UU'   GM  [        U5      S:  Ga  UU   U-   U::  Ga  U(       a$  [        SU	U    ST8 SU" SUUU   -
   SU! S35        Sn'['        U"U#5       GH6  u  m9n([)        U(R+                  5       5      n)[        U)5      S:X  d  U(R,                  R.                  U;   a  MK  U(       a  [        ST9 S35        [)        U(R1                  SS95      U)-   n)U) VV*s/ s H  u  nn*T9 SU 3U*4PM     n)nn*[!        U5       V+VV,s/ s H  u  n+u  nn,UT9:X  d  M  U+PM     sn,nn+S   n%T8U4/US	U% -   U)-   UU%S-   S	 -   n[        U VVs/ s H5  u  nn[        U[        R                  R                  5      (       d  M1  UU4PM7     snnUU5      u  nnSn'  O   U'(       a  GM  U(       a  [        S5        UU   U-   U:  Gaf  [        U[        R2                  5      (       d  [        U[        R4                  5      (       a  / O[)        UR+                  5       5      n-U(       a!  [        SU	U    ST8 SUUU   -
   SU S3	5        [        U-5      S:X  d  UR,                  R.                  U;   a  U(       a  [        S5        OU(       a  [        ST8 S35        [)        UR1                  SS95      U--   n-U- VV*s/ s H  u  nn*T8 SU 3U*4PM     sn*nU-   n[        U VVs/ s H5  u  nn[        U[        R                  R                  5      (       d  M1  UU4PM7     snnUU5      u  nnGM	  UU   S:X  aL  U(       aE  US:w  a?  UU   [7        UU!5      -
  n[9        UUUUU   -
  UU5      u  n.n/n0U/b  U.U/4/T8U4/-   U0-   nGM^  UU   S:X  a  U!U -   UU'   UU   U -   UU'   US-  nT8U4/U-   n[        U5      S:  a  GM  UR;                  5        VV1s0 s H  u  nn1U1S:  d  M  UU1_M     nnn1U(       a  [=        U5      nUR%                  SS5      UR%                  SS5      -   n2U2S:  a  U(       d  Sn3UR;                  5        H<  u  n4n5U4S:X  d  U4S:X  a  M  U3(       a  M  UR%                  U4S5      n6U5U2U6-   :  d  M:  Sn3M>     [        U5      S:  a!  U3(       d  [>        R@                  " S U2 S!35        U(       a?  S"RC                  S# UR;                  5        5       5      n7[D        RG                  S$U7 S%35        U$ s  snf s  snf s  snnf s  snf s  snf s  snnf s  snf s  sn*nf s  sn,nn+f s  snnf s  sn*nf s  snnf s  sn1nf )&a  
Compute a device map for a given model giving priority to GPUs, then offload on CPU and finally offload to disk,
such that:
- we don't exceed the memory available of any of the GPU.
- if offload to the CPU is needed, there is always room left on GPU 0 to put back the layer offloaded on CPU that
  has the largest size.
- if offload to the CPU is needed,we don't exceed the RAM available on the CPU.
- if offload to the disk is needed, there is always room left on the CPU to put back the layer offloaded on disk
  that has the largest size.

<Tip>

All computation is done analyzing sizes and dtypes of the model parameters. As a result, the model can be on the
meta device (as it would if initialized within the `init_empty_weights` context manager).

</Tip>

Args:
    model (`torch.nn.Module`):
        The model to analyze.
    max_memory (`Dict`, *optional*):
        A dictionary device identifier to maximum memory. Will default to the maximum memory available if unset.
        Example: `max_memory={0: "1GB"}`.
    no_split_module_classes (`List[str]`, *optional*):
        A list of layer class names that should never be split across device (for instance any layer that has a
        residual connection).
    dtype (`str` or `torch.dtype`, *optional*):
        If provided, the weights will be converted to that type when loaded.
    special_dtypes (`Dict[str, Union[str, torch.device]]`, *optional*):
        If provided, special dtypes to consider for some specific weights (will override dtype used as default for
        all weights).
    verbose (`bool`, *optional*, defaults to `False`):
        Whether or not to provide debugging statements as the function builds the device_map.
    clean_result (`bool`, *optional*, defaults to `True`):
        Clean the resulting device_map by grouping all submodules that go on the same device together.
    offload_buffers (`bool`, *optional*, defaults to `False`):
        In the layers that are offloaded on the CPU or the hard drive, whether or not to offload the buffers as
        well as the parameters.
    fallback_allocation (`bool`, *optional*, defaults to `False`):
        When regular allocation fails, try to allocate a module that fits in the size limit using BFS.
r   z
Treating module rR   c              3   :   >#    U  H  nTS -   US -   ;   v   M     g7fr:  r  r  s     r,   r   (infer_auto_device_map.<locals>.<genexpr>`  r  r  c              3   :   >#    U  H  nTS -   US -   ;   v   M     g7fr:  r  r  s     r,   r   r  `  r  r  z'  Found the relevant tied param groups z4  So those parameters need to be taken into account r*  NzPutting z and z (size=)z (available=z on c              3   B   >#    U  H  u  nu  p#UT:X  d  M  Uv   M     g 7fNr  )r   r2  r   r   r|  s       r,   r   r    s$     ,u@[91fq_`dt_tQQ@[s   	rP  zNot enough space on z to put z (space available z, needed size z).Fz
Splitting r   r   Tz?None of the tied module can be split, going to the next device.z, module size z6This module cannot be split, going to the next device.r0   zCurrent model requires z bytes of buffer for offloaded layers, which seems does not fit any GPU's remaining memory. If you are experiencing a OOM later, please consider using offload_buffers=True.
c              3   8   #    U  H  u  pS U SU S3v   M     g7f)z  - : z bytes requiredNr  )r   r5   mems      r,   r   r  %  s%      !
Emkfd6("SE1Ems   z{Based on the current allocation process, no modules could be assigned to the following devices due to insufficient memory:
z
These minimum requirements are specific to this allocation attempt and may vary. Consider increasing the available memory for these devices to at least the specified minimum, or adjusting the model config.)$rs  r   r"  r   r  printr   r*   r4   r   r  r   r  rW  r}  nextru  r  r  rv  r   r  r   r   r   r   r   rV  r  r   r<  warningswarnr   r   r.  ):r+   r#  r  rO   r  r  r  r  r  ro  rq  rp  r  rr  r  r   current_devicer5   device_memory_useddevice_buffer_sizes device_minimum_assignment_memorymax_layer_sizemax_layer_namesrx   r   r   rw  r   r   r  r   current_max_sizecurrent_memory_reservedr{  rx  ry  outputrz  current_buffer_sizesplit_happenedtied_moduletied_module_childrenr!  r2  r   r   fallback_module_namefallback_moduleremaining_modulesr  non_gpu_buffer_sizeis_buffer_fit_any_gpu
gpu_devicegpu_max_memorygpu_memory_useddevices_infor   r|  s:                                                           @@r,   infer_auto_device_mapr    s	   ~ 	$E7NWef	 JN29:'&!)':')$ '99IYp&q#NO 
!
#'++A.f&tfA./&5dod11<<X\_bXbKc1od1$.@$4W$4DAq
1ehhoo8V!Q$4W'/+NO #4( .
-
=*== FIItisItFt - 	 
 s,-1;<M;NOP TefTejAAtczS'@aATefhj
 s;'!+HVW(1761A:f-t"#>"l2/.@&4#AZlB
>0,
 #'9&'ADY'Y]m'm#D6*$&7%899F}A66F#/-=@RSY@Z-Z,[[\]]FD**fv&*??&  &Jt %6 #6F'G6F!6F'GG(,,u	JZ@[,u(u%$(():; 06
+, %6 #z&"))'D'D&F%'# /B.E.Efa.PSf.f#F+ {a$6v$>$LP`$`*7>+B*C8D6QVWhVi j!!14Fv4N!N O~^s]ttvx #N145F1U- +'+K,F,F,H'I$+,1[5J5J5S5SWn5nJ'7&8:;'+K,H,HQV,H,W'X[o'o$Sg'hSg41a,<+=Qqc)BA(FSg$'h8ABR8S$m8S91fq!WX\lWlQ8S$mno$p! F^$&'9(9:;*+ ''81'<'>?@ ! 3E(8[(81Jq%((//<ZVaV(8[ +3/
 "&3 2V6  WX f%37GG fbll33z&%,,7W7W &//12 
 *7>+B*C8D6Qc'*<V*DDE^T_S``bd #$)V-=-=-F-FJa-aRS JtfA./#'(?(?(?(N#ORb#b CS#TCS41avQqc]A$6CS#TWg#g 2D(8[(81Jq%((//<ZVaV(8[ +3/
  f%*/BvQWGW  *&1CH]4^^GX  #5f#=='HD /3D *%9?$K#LQUW]P^O_#_bs#s f%*7LOf7f,V4 &8%?BY%Y6"!!6N+.>>A 
!
#D :L9Q9Q9S_9S+&#WZ]^W^+&#+9S_%j1
-11%;>Q>U>UV\^_>``Q %*4*:*:*<&JU"jF&:(("4"8"8Q"G!%8?%JJ,0) += t9q=!6MM)*=)> ?( ) (yy !
EeEkEkEm!
 
 	%n wx	
 ] ; e X
 BfR (HL (i$m \J $U \B `s   ` 	
`%`%2`%0`*
`*
%`0`0#`0
`:
#`57`5=`:
a a3a
a
60a*a!a0a
<a
7a$a$5`:
c                 f   [        U R                  5       5      nU Vs/ s H  o3S:w  d  M
  X2;  d  M  UPM     nnU(       a  [        R                  " SU 3[        5        U R                  5       R                  5        VVs/ s H  u  pVUPM	     nnnUR                  5        HO  nUS:X  a  UR                  5           O8U Vs/ s H'  nXX:X  a  M
  UR                  US-   5      (       a  M%  UPM)     nnMQ     [        U5      S:  a  SR                  U5      n	[        SU	 35      egs  snf s  snnf s  snf )z
Checks a device map covers everything in a given model.

Args:
    model (`torch.nn.Module`): The model to check the device map against.
    device_map (`Dict[str, Union[int, str, torch.device]]`): The device map to check.
r   zHThe following device_map keys do not match any submodules in the model: rR   r   z, zOThe device_map provided does not give any device for the following parameters: N)dictr   r  r  UserWarning
state_dictr   r   clearr   r   r   rJ   )
r+   r   all_module_namesr3  invalid_keysr   r   all_model_tensorsr  non_covered_paramss
             r,   check_device_mapr  2  s;    E//12)Sz!"WA9RAzLSVWcVdegr	
 .3-=-=-?-E-E-GH-G'$-GH!("##% .!-D* 37??;QTCT3U -  ! ) !!YY'89]^p]qr
 	
 "% T I!s'   	D#D#D# D( 	D.D.(D.c                 L   U R                  S5      (       Ga+  [        U SS9 nUR                  5       nUR                  5       nSSS5        Wc  [        R                  SU  S35        SS0nUR                  S5      S;  a  [        SU  S	35      eUS   S:w  a  [        S
US    S35      eUc  [        U 5      $ [        [        UR                  5       5      5      S:X  ab  [        UR                  5       5      S   nUn[        U[        5      (       a&  [!        5       (       a  SU 3nO[#        5       (       a  Sn[        XS9$ [        [        UR                  5       5      S1-
  5      nSU;  a  UR%                  S5        U Vs0 s H  oU/ _M     nnUR'                  5        HP  u  pXW;   d  M  X   R)                  W V
s/ s H%  oU	:X  d  U
R+                  U	S-   5      (       d  M#  U
PM'     sn
5        MR     US   R)                  W V
s/ s H$  o[-        UR                  5       / 5      ;  d  M"  U
PM&     sn
5        0 n[/        5       (       a3  [1        S[-        U Vs/ s H  n[        X   5      PM     sn5      SSSS9nOSnU H  nUn[        U[        5      (       a&  [!        5       (       a  SU 3nO[#        5       (       a  Sn[        U SUS9 nX    HO  nUb!  UR3                  USS9  UR5                  U5        UR7                  U5      X'   Uc  M?  UR9                  5         MQ     SSS5        M     Ub  UR;                  5         U$ [<        R>                  " U [<        R@                  " S5      SS9$ ! , (       d  f       GN1= fs  snf s  sn
f s  sn
f s  snf ! , (       d  f       GM$  = f)a  
Load a checkpoint from a given file. If the checkpoint is in the safetensors format and a device map is passed, the
weights can be fast-loaded directly on the GPU.

Args:
    checkpoint_file (`str`): The path to the checkpoint to load.
    device_map (`Dict[str, Union[int, str, torch.device]]`, *optional*):
        A map that specifies where each submodule should go. It doesn't need to be refined to each parameter/buffer
        name, once a given module name is inside, every submodule of it will be sent to the same device.
z.safetensorspt)	frameworkNz"The safetensors archive passed at zx does not contain metadata. Make sure to save your model with the `save_pretrained` method. Defaulting to 'pt' metadata.format)r  tfflaxzf does not contain the valid metadata. Make sure you save your model with the `save_pretrained` method.z%The checkpoint passed was saved with z, we need a the pt format.r   r   r   r   r   r*  r0   rR   Fw)main_process_onlytotalunit	smoothingleave)r  r5   )devrefreshT)map_locationweights_only)!rH   r#   rD  r   r   r   r  OSErrorrJ   safe_load_filer   r   r   r   r*   rF   r   r   r   r   extendr   rW  r   r    set_postfixset_description
get_tensorupdatecloser4   loadr5   )checkpoint_filer   frD  weight_namesr5   target_devicero  device_weightsr  r3  tensorsprogress_barr  s                 r,   load_state_dictr  T  s    //$71zz|H668L 8 NN4_4E Fo o !$'H<<!)==4_4E FI I  h4'DXhEWDXXrstt!/22 3z((*+,1j//1215 &fc**'))*.vh)++(-%oLL3z0023vh>?GG#u% 8??wVbjwN?'1'7'7'9#$"*11$0hLq4DU`cfUfHgLh (: 5!((\)o\cR`RgRgRikmNnEn!\)opG ""#&+Qvs>#9:QR   $! &fc**'))*.vh)++(-$}UYZ-5'3(444O(88='(||C'8'3(//1  6 VU "  '""$Nzz/U8KZ^__a 87N @ i *p
 R VUsB   !M-M?<"N
"N
!N	*N	N
A NN-
M<
N#	c                 h   0 n[        5       nU R                  5        H  u  p4US:X  a  M   [        US5         UR                  5       nSSS5        W H_  nXV   R
                  [        R
                  " S5      :X  a  UR                  USU 3-   5        MB  XV   nUR                  S5      XSU 3-   '   Ma     M     UR                  5        H  nXa;   d  M
  UR                  U5        M     U(       a  [        R                  SU 35        U$ ! , (       d  f       N= f! [         a    [	        S5      Sef = f)z
Returns the state dictionary for an offloaded model via iterative onloading

Args:
    model (`torch.nn.Module`):
        The offloaded model we want to save
r   r0   Nz;Offloaded module must fit in CPU memory to call save_model!r   rR   zMThe following tensors were not saved because they were still on meta device: )r   r   align_module_devicer  MemoryErrorr5   r4   addr   r  remover   r   )r+   r  placeholdersr   rx   module_state_dictr  paramss           r,   get_state_dict_offloaded_modelr    s+    J5L++-2:	g$VU3$*$5$5$7! 4
 %C %,,V0DD  !C5	!12&+F+199U+;J#i'( % ."   "$ # fgsftuv% 43 	g[\bff	gs(   DD	D	
D	DDD1r  device_to_put_offloadc                    USUR                  S5       n[        U 5      (       d  Sn[        X5         U R                  5       R	                  5        H  u  pVUSU 3-   U;   d  M  XbUSU 3-   '   M     SSS5        U$ ! , (       d  f       U$ = f)a  
Retrieve the state dictionary (with parameters) from an offloaded module and load into a specified device (defaults
to cpu).

Args:
    module: (`torch.nn.Module`):
        The module we want to retrieve a state dictionary from
    module_name: (`str`):
        The name of the module of interest
    state_dict (`Dict[str, Union[int, str, torch.device]]`):
        Dictionary of {module names: parameters}
    device_to_put_offload (`Union[int, str, torch.device]`):
        Device to load offloaded parameters into, defaults to the cpu.
NrR   )rfindhas_offloaded_paramsr  r  r   )rx   r  r  r  rootm_keyr  s          r,   get_state_dict_from_offloadr    s    * /**3/0D  '' $ 
V	;#..0668ME5'{"z1174AeW+-. 9 
<
  
<	;
 s   .A<$A<<
B
checkpointrC  offload_state_dictkeep_in_fp32_modulesoffload_8bit_bnbstrictfull_state_dictbroadcast_from_rank0c                 D   U(       a  SSK Jn  [        U 5      n[        U 5      (       a$  [	        U5      S:X  a  [
        R                  S5        Ub  [        X5        Uc"  Ub  SUR                  5       ;   a  [        S5      eUb,  Ub)  SUR                  5       ;   a  [        R                  " USS	9  [        U[        5      (       a"  UR                  S
S5      n[        [         U5      nSnSn[        R"                  R%                  U5      (       a(  [        U5      R'                  S5      (       a  UnGOU/nGO[        R"                  R)                  U5      (       Gav  [        R*                  " U5       Vs/ s H  nU[,        :X  d  M  UPM     nn[        R*                  " U5       Vs/ s H  nU[.        :X  d  M  UPM     nn[	        U5      S:X  a%  [        R"                  R1                  UUS   5      /nO[	        U5      S:X  a%  [        R"                  R1                  UUS   5      /nO[        R*                  " U5       Vs/ s H  nUR'                  S5      (       d  M  UPM     nn[	        U5      S:X  a  [        U S[,         S[.         S35      e[	        U5      S:X  a$  [        R"                  R1                  UUS   5      nO[        U S35      e[        SU S35      eUb  [        R"                  R3                  U5      S   n[5        U5       n[6        R8                  " UR;                  5       5      nSSS5        SW;   a  US   n[=        [?        [A        UR                  5       5      5      5      nU Vs/ s H#  n[        R"                  R1                  UU5      PM%     nn0 nU(       a  [B        RD                  " 5       n0 n[A        5       n[A        U RG                  5       RI                  5       5      nU RK                  5        VVs/ s H  u  nnUPM
     nnnU RG                  5       R                  5        Vs1 s H0  n[        U[         RL                  5      (       d  M$  URN                  iM2     nnU[         RN                  " S5      1-
  n U GHS  n!Uc  [Q        SS5      (       a  [Q        SS5      (       a  [	        U 5      S::  d  [	        U5      S::  aj  SSK)J*n"J+n#  U[Q        SS5      -  nU(       a  [X        RZ                  " 5       S:X  a
  []        U!US9O0 n$U#" U U$U"" S-U
U	S.[Q        SS5      (       a  SU0O0 D6S9  O[]        U!US9n$U R]                  U$U	S9  UR_                  [A        U$RI                  5       5      U-
  5        GOE[]        U!US9n$U$Ra                  5        GH&  u  n%n&S U%;   a  M  U%U;  a  URc                  U%5        U	(       d  M/  U%n'[	        U'5      S:  a@  U'U;  a:  SR1                  U'R3                  S5      SS! 5      n'[	        U'5      S:  a  U'U;  a  M:  U'S:X  a  SU;  a  [        U% S"35      eUU'   n(Un)Ubm  [         Rd                  " U&5      (       aR  UbO  U[         Rf                  :X  a;  S#n*U H  n+U+U%;   a	  U+S-   U%;   d  U+U%:X  d  M  Sn*  O   U*(       a  [         Rh                  n)S$U%;   aX  U%R                  S$S 5      U$RI                  5       ;   a4  U&Rj                  [         Rl                  :X  a  U$U%R                  S$S 5         n,OSn,U(S:X  aR  U(       d  U%U;  aB  U)c  U&Rj                  n)U(       a  W" U U&U%U)UUW,5        GM  [o        U U%SU)S%9  [q        U&U%UUS&9  GM  GM  U(S':X  aI  U(       aB  U)c  U&Rj                  n)U(       a  W" U U&U%U)WWW,5        GM  [o        U U%SU)S%9  [q        U&U%WWS&9  GM  [o        U U%U(U&U)W,S(9  GM)     A$[r        Rt                  " 5         GMV     U	(       dB  [	        U5      S:  a3  [
        R                  S)U S*U Rv                  Rx                   S+U S,35        [{        UU5        U(       a#  [}        U WW5        [~        R                  " U5        [        X5        gs  snf s  snf s  snf ! , (       d  f       GNR= fs  snf s  snnf s  snf ).aN  
Loads a (potentially sharded) checkpoint inside a model, potentially sending weights to a given device as they are
loaded.

<Tip warning={true}>

Once loaded across devices, you still need to call [`dispatch_model`] on your model to make it able to run. To
group the checkpoint loading and dispatch in one single call, use [`load_checkpoint_and_dispatch`].

</Tip>

Args:
    model (`torch.nn.Module`):
        The model in which we want to load a checkpoint.
    checkpoint (`str` or `os.PathLike`):
        The folder checkpoint to load. It can be:
        - a path to a file containing a whole model state dict
        - a path to a `.json` file containing the index to a sharded checkpoint
        - a path to a folder containing a unique `.index.json` file and the shards of a checkpoint.
        - a path to a folder containing a unique pytorch_model.bin or a model.safetensors file.
    device_map (`Dict[str, Union[int, str, torch.device]]`, *optional*):
        A map that specifies where each submodule should go. It doesn't need to be refined to each parameter/buffer
        name, once a given module name is inside, every submodule of it will be sent to the same device.
    offload_folder (`str` or `os.PathLike`, *optional*):
        If the `device_map` contains any value `"disk"`, the folder where we will offload weights.
    dtype (`str` or `torch.dtype`, *optional*):
        If provided, the weights will be converted to that type when loaded.
    offload_state_dict (`bool`, *optional*, defaults to `False`):
        If `True`, will temporarily offload the CPU state dict on the hard drive to avoid getting out of CPU RAM if
        the weight of the CPU state dict + the biggest shard does not fit.
    offload_buffers (`bool`, *optional*, defaults to `False`):
        Whether or not to include the buffers in the weights offloaded to disk.
    keep_in_fp32_modules(`List[str]`, *optional*):
        A list of the modules that we keep in `torch.float32` dtype.
    offload_8bit_bnb (`bool`, *optional*):
        Whether or not to enable offload of 8-bit modules on cpu/disk.
    strict (`bool`, *optional*, defaults to `False`):
        Whether to strictly enforce that the keys in the checkpoint state_dict match the keys of the model's
        state_dict.
    full_state_dict (`bool`, *optional*, defaults to `True`): if this is set to `True`, all the tensors in the
        loaded state_dict will be gathered. No ShardedTensor and DTensor will be in the loaded state_dict.
    broadcast_from_rank0 (`False`, *optional*, defaults to `False`): when the option is `True`, a distributed
        `ProcessGroup` must be initialized. rank0 should receive a full state_dict and will broadcast the tensors
        in the state_dict one by one to other ranks. Other ranks will receive the tensors and shard (if applicable)
        according to the local shards in the model.

r   )quantize_and_offload_8bitr   rn  Nr*  zeAt least one of the model submodule will be offloaded to disk, please pass along an `offload_folder`.T)exist_okr   r   z.jsonz.index.jsonz6 is not a folder containing a `.index.json` file or a z or a z filezI containing more than one `.index.json` file, delete the irrelevant ones.z`checkpoint` should be the path to a file containing a whole state dict, or the index of a sharded checkpoint, or a folder containing a sharded checkpoint or the whole state dict, but got rR   
weight_mapr   rQ   z2.2.0z2.7.0)StateDictOptionsset_model_state_dictz2.4.0)r   )r  r  r  )options)r  r   r;   z doesn't have any device set.Fr   r  r1   r0   )rz   rO   r{   z(Some weights of the model checkpoint at z! were not used when initializing r  z. This may or may not be an issue - make sure that the checkpoint does not have unnecessary parameters, or that the model definition correctly corresponds to the checkpoint.r  )Bbnbr  r   r   r   r   r   r   r   rJ   rA  makedirsr*   r[   r  r   r4   rB  isfilerH   isdirlistdirr   r   r   r   openjsonloadsreadr   r   r   tempfilemkdtempr  r   r   r   r5   r!   'torch.distributed.checkpoint.state_dictr  r  distget_rankr  r  r   r  is_floating_pointrh   re   rO   rk   r   r   gccollectr   r   r   rG  shutilrmtreer   )-r+   r  r   rC  rO   r  r  r  r  r  r  r  r  r   checkpoint_filesindex_filenamer  potential_state_binpotential_state_safetensorpotential_indexcheckpoint_folderr2   offload_indexstate_dict_folderstate_dict_indexunexpected_keys
model_keysr   r   buffer_namestmodel_devicesmodel_physical_devicesr  r  r  loaded_checkpointr   r   r  param_device	new_dtypeproceedr  r{   s-                                                r,   load_checkpoint_in_modelr-    s   z 2&u-K&u--#k2Ba2G A	
 ,[E*"8VzGXGXGZ=Zs
 	
 
	#
(>6ZM^M^M`C`
NT2%h+u%N	ww~~j!!z?##G,,'N *|	z	"	"*,**Z*@V*@QADUq*@V13J1G%b1GA1PaKaa1G"%b"#q( "Z9LQ9O PQ+,1 "Z9STU9V WX +-**Z*@^*@QAJJ}D]q*@O^?#q( !l"XYeXfflm~l  @E  F  _%*!#j/!:L!M !l"kl  hhrgsstv
 	

 !GGMM.9!<.!QJJqvvx(E " 5 ,'E!$s5<<>':";<HXYHX1BGGLL):A>HXY M$,,.eOU%%',,./J(-(;(;(=>(=WT1D(=L>','7'7'9'@'@'Bb'B!jQRTYT`T`FaXQXX'BMb*ell6.B-CC+  g..!$00S9O5PTU5UZ]^kZlpqZqj$(8w(GG$ 04==?a3G $O
K "
 %%, (7% N^^bdkMlMl24HIrt %4OPZ$[!%%&7%G""3'8'='='?#@:#MN /J W%6%<%<%>!
EJ&Z/#''
3! (+&*{*/L"%((;+<+<S+A#2+F"GK +&*{*/L"$:)=$
|3P%QRR)+6!	$)@)@)G)G+7EU]]<R"'#7C!$
!2sj9PVY]gVg*. % $8 #(-Iz)j.@.@5.QUfUkUkUm.m{{ejj0*;J<N<NxY^<_*`&*O6)&*L*H$,(-I+5 %uj)^Ubds %7z6Ybc&uj.P]^ +I "U*/A ($)KK	'1!5*iARTdfu 4E:vU^_&uj:KScd/"$#'(7u &?H 


M ,P c/*Q.6zl C"__556b8I  Jwx	

 }n5 u&68IJ'(U(I W%b _& "! Z ?bsH   3a1a1%a67a6?a;a;%b 7*bb9#b b 
b
native_ampautocast_kwargsc                 z   [        5       nUc  0 nOUR                  5       nU (       Ga  UR                  [        R                  :X  a  [        SS9(       a  SOUR                  R                  nUR                  S:X  a'  [        R                  " SU[        R                  S.UD6$ UR                  S;   a  UR                  [        R                  [        R                  [        R                  [        R                  [        R                   [        R"                  [        R$                  [        R&                  [        R(                  [        R*                  [        R                  4;   a'  [        R                  " SU[        R,                  S.UD6$ [        R                  " SSU0UD6$ [.        R0                  " 5       $ )	a,  
Return a context manager for autocasting mixed precision

Args:
    native_amp (`bool`, *optional*, defaults to False):
        Whether mixed precision is actually enabled.
    cache_enabled (`bool`, *optional*, defaults to True):
        Whether the weight cache inside autocast should be enabled.
Tcheck_is_gpur   fp16)device_typerO   )bf16fp8r4  r  )r
   	to_kwargsdistributed_typer   XLAr   r5   r3   mixed_precisionr4   autocastrh   NO	MULTI_CPU	MULTI_GPU	MULTI_MLU
MULTI_SDAA
MULTI_MUSA	MULTI_NPU	MULTI_XPU	MULTI_HPUFSDPrg   
contextlibnullcontext)r.  r/  stater4  s       r,   #get_mixed_precision_context_managerrI    sV    E)335 &&/*=*==BXfjBk "" 	
   F*>>bkbRabb""o5%:P:P%%%%%%&&&&%%%%%%  U
 ;
 >>ckcSbcc>>MkM_MM%%''r.   r8  c                 b   U [         R                  :X  a  SSKJn  U" S
0 UD6$ [	        SS9(       a  SSKJn  UR                  " S
0 UD6$ [        5       (       a*  [        R                  R                  R                  " S
0 UD6$ [        5       (       a*  [        R                  R                  R                  " S
0 UD6$ [        5       (       a*  [        R                  R                  R                  " S
0 UD6$ [        5       (       a*  [        R                   R                  R                  " S
0 UD6$ [#        5       (       a   [        R                  R                  " S0 UD6$ [%        5       (       a   [        R                  R                  " S0 UD6$ ['        5       (       a<  [)        SS5      (       d  [+        S5      e[        R                  R                  " S0 UD6$ [)        SS	5      (       a   [        R                  R                  " S0 UD6$ [        R,                  R                  R                  " S
0 UD6$ )aH  
A generic helper which will initialize the correct `GradScaler` implementation based on the environment and return
it.

Args:
    distributed_type (`DistributedType`, *optional*, defaults to None):
        The type of distributed environment.
    kwargs:
        Additional arguments for the utilized `GradScaler` constructor.
r   )ShardedGradScalerTr1  NrQ   z2.8.0z7Grad Scaler with MPS device requires a Pytorch >= 2.8.0z2.3r  )r   )r   )r)  )r   )r   rE  *torch.distributed.fsdp.sharded_grad_scalerrK  r   torch_xla.ampamp
GradScalerr   r4   r&  r   r'  r   r(  r   r%  r   r   r   r!   rJ   r   )r8  r   rK  xamps       r,   get_grad_scalerrQ  ,  s    ?///P *6**40$(((			yy}}''1&11			zz~~((2622			zz~~((2622			yy}}''1&11			yy##4V44			yy##4V44			g..VWWyy##4V44D%((99''9&99::>>,,6v66r.   c                     SSK Jn  [        U S5      =(       a3    [        U R                  U5      =(       a    U R                  R
                  $ )a@  
Checks if a module has offloaded parameters by checking if the given module has a AlignDevicesHook attached with
offloading enabled

Args:
    module (`torch.nn.Module`): The module to check for an offload hook.

Returns:
    bool: `True` if the module has an offload hook and offloading is enabled, `False` otherwise.
r	   )AlignDevicesHook_hf_hook)hooksrS  r   r*   rT  offload)rx   rS  s     r,   r  r  V  s7     )6:&t:fooGW+Xt]c]l]l]t]ttr.   execution_devicec              #     #    [        U 5      (       a{  Ub&  U R                  R                  nXR                  l         U R                  R                  U 5        Sv   U R                  R	                  U S5        Ub  WU R                  l        ggUbm  U R                  SS9 VVs0 s H  u  p4X4R                  _M     nnn U H  n[        XU5        M     Sv   UR                  5        H  u  p6[        XU5        M     gSv   g! U R                  R	                  U S5        Ub  WU R                  l        f f = fs  snnf ! UR                  5        H  u  p6[        XU5        M     f = f7f)a^  
Context manager that moves a module's parameters to the specified execution device.

Args:
    module (`torch.nn.Module`):
        Module with parameters to align.
    execution_device (`torch.device`, *optional*):
        If provided, overrides the module's execution device within the context. Otherwise, use hook execution
        device or pass
NFr   )	r  rT  rW  pre_forwardpost_forwardr   r5   r   r   )rx   rW  original_devicer   r   ro  r5   s          r,   r  r  f  s?     F##'$oo>>O/?OO,	COO''/OO((6+3B0 , 
	%9?9P9PY^9P9_`9_+$4%9_`	B+F:JK   '+F&A !0 	 OO((6+3B0 , a !(+F&A !0sA   :E)D AE)#D9;E)?D? +E)3D66	E)?'E&&E))NNNNFT)TFF)FF)NNF)NNr  )r   )NNNNF)NNNN)NNNNFTFF)r0   )
NNNFFNFFTF)FN)qrF  r  r   r  loggingrA  rY   r  r  r  collectionsr   r   typingr   r   r4   r   r  r   rH  r
   	constantsr   r   dataclassesr   r   r   importsr   r   r   r   r   r   r   r   r   memoryr   r   rV  r   r   r   r    r   versionsr!   	torch_npu	torch_mlu
torch_sdaa
torch_musasafetensorsr#   safetensors.torchr$   r  WEIGHTS_INDEX_NAME	getLoggerr   r   r-   r8   rF   r[   rN   rO   r_   r   rU  r5   rw   r  
HalfTensorr  rS   r   r   r   r   r   r   r   r   r   r  r  r  r"  r7  r<  rG  rM  re  rl  rs  r}  r  r  r  r  r  r`   r  PathLiker-  rI  rQ  r  contextmanagerr  r  r.   r,   <module>ro     s	    	    	 	    0 "  %  $ 6 E E
 
 
 A N N ) & ''%(%( ! 9 4 			8	$])6)5c? )X5;; 8!4ell !4uU\\35K/L !4P %)/326MQO>IIO>O> #sELL()O> ELL!	O>
 E#u{{*+,O> e../O> d3U\\5<<-G(H#HIJO> O> O>f kp#II#(,#>B#cg#<&ryy &4 &t &0%U299 %UP;.4^ 4^d49o 4^n:>U3#45 %++  15DH	'99'E#u||+,-' T#uS%,,->'?"?@A' 	'X 15DH	#99	#E#u||+,-	# T#uS%,,->'?"?@A	#$!%UXX__,-.$!>B38n$!gklogp$!NlxU38_eCHo-M(NO l^c5c5<<1G+H&H!I X[ ,m< DH37/3DHC99CeCHouS#X>?@C &d3i0C E#u{{*+,	C
 T#uS%,,->'?"?@AC CL%588?? %( DH37/3DH=99=eCHouS#X>?@= &d3i0= E#u{{*+,	=
 T#uS%,,->'?"?@A= sCxsCx%S/	)*sCxIcNcOIsBII~	!	=@!B
 3S	4		?*+!BP 4815i!%RYY'(i!sCx.i! c3hi! &d3i0	i!
 d49o.i! 8C=(299-tE#ryy.4I/JJKi!\ DH37/3CG! %q99qeCHouS#X>?@q &d3i0q E#u{{*+,	q
 T#uS%++-='>">?@q q q q qh	
BII 
4U3U\\CY=Z8Z3[ 
D\`~"")) "R <A	!II!! S%U\\ 1223! !c5<<!78	!N FJ8</3$!04" !&B)99B)c2;;&'B) c5c5<<)?#@@ABB) U3#345	B)
 E#u{{*+,B) B) B) #49-B) B) B) B) B)J((D ((Sa ((V'7o '7Tu uT u  # #8ELLCY # #r.   