
    ;iaO                        S SK r S SKrS SKrS SKrS SKJr  S SK Jr  S SKJrJ	r	  S SK
Jr  S SKJr  S SKrS SKrS SKJr  S SKJr  S	S
KJr  S	SKJr  S	SKJr  SSKJr  SSKJr  SSK J!r!J"r"J#r#J$r$J%r%  SSK&J'r'  SSK(J)r)  SSK*J+r+  \" \,5      r-\$" 5       (       a	  S SK.J/s  J0r1  S\Rd                  Rf                  S\44S jr5S\Rd                  Rf                  S\44S jr6S\Rd                  Rf                  S\44S jr7S\Rd                  Rf                  S\44S jr8S\Rd                  Rf                  S\Rd                  Rf                  4S jr9S\Rd                  Rf                  4S jr:S\Rd                  Rf                  S\44S jr; S6S\4S\4S \44S! jjr<S" r=S#\>4S$ jr?S7S%\4S&\44S' jjr@\"" S(5      (       a  \R                  O\R^                  rB\BR                  R                  \R                  \\R                  /rG\"" S)5      (       a%  \GR                  \R                  R                  5        S8S* jrKS+ rLS, rMS8S-\\N   S\44S. jjrOS\N4S/ jrPS0 rQS1 rRS2\S4S3 jrTS9S\Rd                  Rf                  S4\4S\U\Rd                  Rf                     4S5 jjrVg):    N)encode)OrderedDict)partialreduce)
MethodType)Optional)Version)	save_file   )write_basic_config)
get_logger)PartialState   )FSDP_PYTORCH_VERSION)DistributedType)is_deepspeed_availableis_numpy_availableis_torch_distributed_availableis_torch_xla_availableis_weights_only_available)id_tensor_storage)convert_model)is_torch_versionmodulereturnc                     [        [        S5      (       d  g[        U [        R                  R                  R
                  5      $ )z<
Check whether the module was compiled with torch.compile()
_dynamoF)hasattrtorch
isinstancer   
eval_frameOptimizedModuler   s    f/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/accelerate/utils/other.pyis_compiled_moduler%   6   s1     5)$$femm66FFGG    c                     [        [        S5      (       d  gU R                  (       aL  U R                  5        H8  n[	        U[        R
                  R                  R                  5      (       d  M8    g   g)zT
Check whether the module has submodules that were compiled with `torch.compile()`.
r   FT)r   r   _modulesmodulesr    r   r!   r"   r   	submodules     r$   has_compiled_regionsr,   @   sQ     5)$$)I)U]]%=%=%M%MNN * r&   c                    ^  [        T [        R                  R                  5      =(       a    [	        U 4S jT  5       5      $ )z
Check whether the module is a repeated block, i.e. `torch.nn.ModuleList` with all children of the same class. This
is useful to determine whether we should apply regional compilation to the module.
c              3   V   >#    U  H  n[        UTS    R                  5      v   M      g7f)r   N)r    	__class__).0mr   s     r$   	<genexpr>%is_repeated_blocks.<locals>.<genexpr>U   s)     :ngmbc:aPQI\I\;];]gms   &))r    r   nn
ModuleListallr#   s   `r$   is_repeated_blocksr7   O   s-     fehh112ns:ngm:n7nnr&   c                 x    U R                   (       a)  U R                  5        H  n[        U5      (       d  M    g   g)z
Check whether the module has repeated blocks, i.e. `torch.nn.ModuleList` with all children of the same class, at
any level of the module hierarchy. This is useful to determine whether we should apply regional compilation to the
module.
TF)r(   r)   r7   r*   s     r$   has_repeated_blocksr9   X   s/     )I!),, * r&   c                    ^ S[         R                  R                  S[         R                  R                  4U4S jjmT" U 40 UD6nSUR                  ;  a  XR                  S'   U$ )a  
Performs regional compilation where we target repeated blocks of the same class and compile them sequentially to
hit the compiler's cache. For example, in `GPT2LMHeadModel`, the repeated block/class is `GPT2Block`, and can be
accessed as `model.transformer.h[0]`. The rest of the model (e.g. model.lm_head) is compiled separately.

This allows us to speed up the compilation overhead / cold start of models like LLMs and Transformers in general.
See https://pytorch.org/tutorials/recipes/regional_compilation.html for more details.

Args:
    module (`torch.nn.Module`):
        The model to compile.
    **compile_kwargs:
        Additional keyword arguments to pass to `torch.compile()`.

Returns:
    `torch.nn.Module`: A new instance of the model with some compiled regions.

Example:
```python
>>> from accelerate.utils import compile_regions
>>> from transformers import AutoModelForCausalLM

>>> model = AutoModelForCausalLM.from_pretrained("gpt2")
>>> compiled_model = compile_regions(model, mode="reduce-overhead")
>>> compiled_model.transformer.h[0]
OptimizedModule(
    (_orig_mod): GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
            (c_attn): Conv1D(nf=2304, nx=768)
            (c_proj): Conv1D(nf=768, nx=768)
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
            (c_fc): Conv1D(nf=3072, nx=768)
            (c_proj): Conv1D(nf=768, nx=3072)
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
        )
    )
)
```
r   r   c           	        > [        U 5      (       aO  [        R                  R                  5       nU  H)  nUR	                  [        R
                  " U40 UD65        M+     U$ [        U 5      (       a  U R                  R                  U R                  5      nUR                  R                  U R                  5        0 Ul        U R                  5        H  u  pCUR                  UT" U40 UD65        M      U$ [        R
                  " U 40 UD6nU$ N)r7   r   r4   r5   appendcompiler9   r/   __new____dict__updater(   named_children
add_module)r   compile_kwargs
new_moduler+   name_compile_regionss        r$   rG   )compile_regions.<locals>._compile_regions   s    f%%,,.J#	!!%--	"L^"LM $  !(())11&2B2BCJ&&v7"$J#)#8#8#:%%d,<Y,Y.,YZ $;
  v@@Jr&   	_orig_mod)r   r4   Moduler@   )r   rD   rE   rG   s      @r$   compile_regionsrK   f   sZ    ^ uxx   "&;N;J*---+1K(r&   c                     [        U 5      (       a  U  H  nUR                  " S0 UD6  M     g[        U 5      (       a$  U R                  5        H  n[	        U40 UD6  M     gU R                  " S0 UD6  g)a  
Performs regional compilation the same way as `compile_regions`, but specifically for `DeepSpeedEngine.module`.
Since the model is wrapped in a `DeepSpeedEngine` and has many added hooks, offloaded parameters, etc that
`torch.compile(...)` interferes with, version of trgional compilation uses the inplace `module.compile()` method
instead.

Args:
    module (`torch.nn.Module`):
        The model to compile.
    **compile_kwargs:
        Additional keyword arguments to pass to `module.compile()`.
N )r7   r>   r9   childrencompile_regions_deepspeed)r   rD   r+   childs       r$   rO   rO      sd     &!!I//  	V	$	$__&E%e>~> ' 	((r&   modelc                    ^ [        SS5      (       a  SSKJm  OSSKJm  [	        U4S jU R                  5        5       5      $ )z
Check if the model has DTensor parameters.

Args:
    model (`torch.nn.Module`):
        The model to check.

Returns:
    `bool`: Whether the model has DTensor parameters.
>=z2.5.0r   )DTensorc              3   <   >#    U  H  n[        UT5      v   M     g 7fr<   )r    )r0   prT   s     r$   r2   $model_has_dtensor.<locals>.<genexpr>   s     B/A!z!W%%/As   )r   torch.distributed.tensorrT   torch.distributed._tensorany
parameters)rQ   rT   s    @r$   model_has_dtensorr\      s4     g&&4 	6Bu/?/?/ABBBr&   keep_fp32_wrapperkeep_torch_compile	recursivec                   ^ [         R                  R                  R                  [         R                  R                  4n[        U 5      n[        U 5      nU(       a  U nU R                  n OU(       a  U nU R                  S   n [        5       (       a  SSK
Jn  XH4-  n[        S[        5      (       a  [        5       (       a  SSKJn	  XI4-  n[#        X5      (       a  U R$                  n [#        X5      (       a  M  U(       a  U4S jmT" U 5      n U(       d  U R&                  n
U R                  R)                  SS5      nUbF  [+        U
S	5      (       a%  U
R,                  n
X:X  a  O[+        U
S	5      (       a  M%  [/        X5      U l        [1        U S
S5      (       a
  [3        U SS9  U(       a*  U(       a  U Wl        Un U $ U(       a  U WR                  S'   Un U $ )a_  
Extract a model from its distributed containers.

Args:
    model (`torch.nn.Module`):
        The model to extract.
    keep_fp32_wrapper (`bool`, *optional*):
        Whether to remove mixed precision hooks from the model.
    keep_torch_compile (`bool`, *optional*):
        Whether to unwrap compiled model.
    recursive (`bool`, *optional*, defaults to `False`):
        Whether to recursively extract all cases of `module.module` from `model` as well as unwrap child sublayers
        recursively, not just the top-level distributed containers.

Returns:
    `torch.nn.Module`: The extracted model.
rI   r   )DeepSpeedEnginerS   )FullyShardedDataParallelc                    > [        U S5      (       a  T" U R                  5      nOU nUR                  5        H  u  p#[        XT" U5      5        M     U$ )Nr   )r   r   rB   setattr)r   unwrapped_modulerF   rP   _recursive_unwraps       r$   rf   6extract_model_from_parallel.<locals>._recursive_unwrap	  sT     vx((#4V]]#C #) />>@(0A%0HI  A##r&   _original_forwardN__wrapped__ _converted_to_transformer_engineF)to_transformer_engine)r   r4   parallelDistributedDataParallelDataParallelr%   r,   rI   r@   r   	deepspeedra   r   r   r   2torch.distributed.fsdp.fully_sharded_data_parallelrb   r    r   forwardpopr   ri   r   getattrr   )rQ   r]   r^   r_   optionsis_compiledhas_compiledcompiled_modelra   FSDPrq   original_forwardrf   s               @r$   extract_model_from_parallelrz      s   ( xx  88%((:O:OPG$U+K'.L	{+-%%2338V8X8Xg7
U
$
$ U
$
$ 
	$ "%(-- >>--.A4H''=11!--. '=11 'w6EM5<eDD%u=',N$"E
 L	 38N##K0"ELr&   c                  4    [        5       R                  5         g)z
Introduces a blocking point in the script, making sure all processes have reached this point before continuing.

<Tip warning={true}>

Make sure all processes will reach this instruction otherwise one of your processes will hang forever.

</Tip>
N)r   wait_for_everyonerM   r&   r$   r|   r|   /  s     N$$&r&   
state_dictc           	         [         R                  " [        5      nU R                  5        H9  u  p#[	        U[
        5      (       a  M  U[        U5         R                  U5        M;     UR                  5        VVs0 s H  u  pE[        U5      S:  d  M  XE_M     nnn[        5       nUR                  5        H>  nU Vs/ s H  o"U ;   d  M
  UPM     nnUR                  USS 5        USS  H  nX	 M     M@     [        U5      S:  a  [        R                  SU S35        U R                  5        V	V
s0 s H7  u  pU	[	        U
[        R                  5      (       a  U
R!                  5       OU
_M9     n n	n
U $ s  snnf s  snf s  sn
n	f )z
Cleans the state dictionary from a model and removes tensor aliasing if present.

Args:
    state_dict (`dict`):
        The state dictionary from a model
r   Nr   zRemoved shared tensor zk while saving. This should be OK, but check by verifying that you don't receive any warning while reloading)collectionsdefaultdictlistitemsr    strr   r=   lensetvaluesrA   loggerwarningr   Tensor
contiguous)r}   ptrsrF   tensorptrnamesshared_ptrs
warn_namesfound_nameskvs              r$    clean_state_dict_for_safetensorsr   <  sa    ""4(D"((*&#&&"6*+2248 +
 15

O*#E
Q:3:KOJ##% ).D1CtD+ab/*OD  $ & :$ZL  1\  ]	
 WaVfVfVhiVhda!z!U\\'B'BQ\\^IVhJi# P E js   ;E7E7>	E=E=4>Fsave_on_each_nodesafe_serializationc                    [        5       R                  [        R                  :X  a  [        R
                  " U 5      n U(       a1  [        [        SS0S9n[        U [        5      (       a  [        U 5      n O[        R                  n[        5       R                  (       a  U(       d	  U" X5        g[        5       R                  (       a  U(       a	  U" X5        ggg)a  
Save the data to disk. Use in place of `torch.save()`.

Args:
    obj:
        The data to save
    f:
        The file (or file-like object) to use to save the data
    save_on_each_node (`bool`, *optional*, defaults to `False`):
        Whether to only save on the global main process
    safe_serialization (`bool`, *optional*, defaults to `False`):
        Whether to save `obj` using `safetensors` or the traditional PyTorch way (that uses `pickle`).
formatpt)metadataN)r   distributed_typer   XLAxm_maybe_convert_to_cpur   safe_save_filer    r   r   r   saveis_main_processis_local_main_process)objfr   r   	save_funcs        r$   r   r   _  s    $ ~&&/*=*==&&s+Nh5EF	c;''237CJJ	~%%.?#		-	-2C# 3D	-r&   z2.0.0z1.25.0c                 p    [        5       (       aM  [        R                  R                  5       nSU;  a  SUS'   [        R                  R	                  [
        5        OUR                  SS5        [        R                  " U 4SU0UD6n[        5       (       aD  [        R                  R                  5         W(       a  [        R                  R	                  U5        U$ ! [        5       (       aF  [        R                  R                  5         W(       a   [        R                  R	                  U5        f f f = f)a  
Compatible drop-in replacement of `torch.load()` which allows for `weights_only` to be used if `torch` version is
2.4.0 or higher. Otherwise will ignore the kwarg.

Will also add (and then remove) an exception for numpy arrays

Args:
    f:
        The file (or file-like object) to use to load the data
    map_location:
        a function, `torch.device`, string or a dict specifying how to remap storage locations
    **kwargs:
        Additional keyword arguments to pass to `torch.load()`.
weights_onlyTNmap_location)	r   r   serializationget_safe_globalsadd_safe_globalsTORCH_SAFE_GLOBALSrr   loadclear_safe_globals)r   r   kwargsold_safe_globals
loaded_objs        r$   r   r     s    G$&&$22CCEV+)-~&001CDJJ~t,ZZGGG
$&&224##445EF	 %&&224##445EF   's   BC AD5c                     [        U S5      (       d  [        U S5      (       d  [        U SU 5      n [        U S5      (       a  U R                  $ [        U S5      (       a  U R                  $ [	        U 5      $ )z 
Gets a pretty name from `obj`.
__qualname____name__r/   )r   rs   r   r   r   )r   s    r$   get_pretty_namer     sd     3''Z0H0Hc;,sN##sJ||s8Or&   c                     U R                  5        H=  u  p#[        U[        5      (       a  UR                  U0 5      n[	        X45        M9  X1U'   M?     U$ )z
Recursively merges two dictionaries.

Args:
    source (`dict`): The dictionary to merge into `destination`.
    destination (`dict`): The dictionary to merge `source` into.
)r   r    dict
setdefaultmerge_dicts)sourcedestinationkeyvaluenodes        r$   r   r     sM     lln
eT""))#r2D$$ % r&   portc                     U c  Sn [         R                   " [         R                  [         R                  5       nUR                  SU 45      S:H  sSSS5        $ ! , (       d  f       g= f)z
Checks if a port is in use on `localhost`. Useful for checking if multiple `accelerate launch` commands have been
run and need to see if the port is already in use.
Ni<s  	localhostr   )socketAF_INETSOCK_STREAM
connect_ex)r   ss     r$   is_port_in_user     sJ    
 |	v~~v'9'9	:a||[$/0A5 
;	:	:s   A
A'c                      [         R                   " [         R                  [         R                  5       n U R                  S5        U R	                  5       S   sSSS5        $ ! , (       d  f       g= f)z
Gets a free port on `localhost`. Useful for automatic port selection when port 0 is specified in distributed
training scenarios.

Returns:
    int: An available port number
) r   r   N)r   r   r   bindgetsockname)r   s    r$   get_free_portr     sC     
v~~v'9'9	:a	w}}q! 
;	:	:s   $A""
A0c                 n    S H!  nU S:  a  [        U S5       SU 3s  $ U S-  n M#     [        U S5       S3$ )z7Converts `size` from bytes to the largest possible unit)bytesKBMBGBTBg      @r    z PB)round)sizexs     r$   convert_bytesr     sJ    .&=D!n%Qqc** /
 D!nS!!r&   c                  
   [         R                  " 5       n U R                  nUS:w  a  g[        R                  " SU R
                  5      tp#nSn[        U5      [        U5      :  a  SU SU S3n[        R                  USS	9  gg)
zFWarns if the kernel version is below the recommended minimum on Linux.LinuxNz(\d+\.\d+\.\d+)z5.5.0zDetected kernel version z,, which is below the recommended minimum of zo; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.T)main_process_only)	platformunamesystemresplitreleaser	   r   r   )infor   _versionmin_versionmsgs         r$   check_os_kernelr     s     >>D[[FXX0$,,?NAKw'+..&wi/[\g[h is s 	 	sd3 /r&   attrc                 D    S n[        X /UR                  S5      -   5      $ )z
Recursive `getattr`.

Args:
    obj:
        A class instance holding the attribute.
    attr (`str`):
        The attribute that is to be retrieved, e.g. 'attribute1.attribute2'.
c                     [        X5      $ r<   )rs   )r   r   s     r$   _getattr#recursive_getattr.<locals>._getattr  s    s!!r&   .)r   r   )r   r   r   s      r$   recursive_getattrr     s#    " (EDJJsO344r&   return_fqnsc                    U(       d  U OSU 4nU/n/ nU(       a  UR                  5       nU(       a  Uu  peUR                  5        Hm  u  px[        U[        R                  R
                  5      (       d  M0  U(       a%  W(       a  US-   U-   OUn	UR                  X45        M\  UR                  U5        Mo     U(       a  UR                  WU45        OUR                  U5        U(       a  M  USSS2   $ )a)  Traverse the model in bottom-up order and return the children modules in that order.

Args:
    model (`torch.nn.Module`): the model to get the children of

Returns:
    `list[torch.nn.Module]`: a list of children modules of `model` in bottom-up order. The last element is the
    `model` itself.
r   r   N)rr   rB   r    r   r4   rJ   r=   )
rQ   r   topstackordered_modulescurrent_modulecurrent_module_namerF   r   
child_names
             r$   get_module_children_bottom_upr     s     #%UCEEO
2@/(779JD$00EX!4s!:T!A^bJLL*!34LL& : ""$7#HI"">2 % 4R4  r&   )TTF)FFr<   )F)Wr   r   r   r   codecsr   r   	functoolsr   r   typesr   typingr   numpynpr   packaging.versionr	   safetensors.torchr
   r   commands.config.defaultr   loggingr   stater   	constantsr   dataclassesr   importsr   r   r   r   r   modelingr   transformer_enginer   versionsr   r   r   torch_xla.core.xla_modelcore	xla_modelr   r4   rJ   boolr%   r,   r7   r9   rK   rO   r\   rz   r|   r   r   r   _corenp_core
multiarray_reconstructndarraydtyper   r=   dtypesUInt32DTyper   r   r   intr   r   r   r   r   r   r   r   rM   r&   r$   <module>r     sq     	   # %     % 9 8     + (  ( - & 
H	 ))Huxx H4 H T ouxx o4 o D EEHHOO E%((// EP)ehhoo )0CUXX__ C C* _dR"R?CRW[Rj
'   FD d H )11"((rww ##JJ
HH  hbii334@
$6# 6$ 6
"s 
""4$5 5"! !t !X\]b]e]e]l]lXm !r&   