
    ;i              	       r   S r SSKJr  SSKJrJrJr  SSKrSSKJ	r	J
r
  \(       a  \	" 5       (       a  SSKJr  S\R                  R                  4S	 jrS
\S\\   S\4S jrS
\S\4S jr\
S\R                  R                  4S j5       r\
S\4S\R                  R                  S\S   S\\   4S jj5       rg)z,
Needed utilities for torchao FP8 training.
    )partial)TYPE_CHECKINGCallableOptionalN   )is_torchao_availabletorchao_required)Float8LinearConfigmodelc                     Su  pU R                  5        H7  u  p4[        U[        R                  R                  5      (       d  M0  Uc  UnUnM9     X4$ )z
Finds the first and last linear layer names in a model.

This is needed during FP8 to avoid issues with instability by keeping the first and last layers unquantized.

Ref: https://x.com/xariusrke/status/1826669142604141052
)NN)named_modules
isinstancetorchnnLinear)r   first_linearlast_linearnamemodules        c/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/accelerate/utils/ao.pyfind_first_last_linear_layersr       sQ     !+L++-fehhoo..##K	 .
 $$    fqnlayers_to_filterreturnc                     [        U [        R                  R                  5      (       a'  U R                  S-  S:w  d  U R
                  S-  S:w  a  gX;   a  gg)an  
A function which will check if `module` is:
- a `torch.nn.Linear` layer
- has in_features and out_features divisible by 16
- is not part of `layers_to_filter`

Args:
    module (`torch.nn.Module`):
        The module to check.
    fqn (`str`):
        The fully qualified name of the layer.
    layers_to_filter (`List[str]`):
        The list of layers to filter.
   r   FT)r   r   r   r   in_featuresout_features)r   r   r   s      r   filter_linear_layersr    1   sM     &%((//**"a'6+>+>+Cq+H
r   c                 2    [        U 5      u  p#[        XX#/S9$ )av  
A filter function which will filter out all linear layers except the first and last.

<Tip>

    For stability reasons, we skip the first and last linear layers Otherwise can lead to the model not training or
    converging properly

</Tip>

Args:
    module (`torch.nn.Module`):
        The module to check.
    fqn (`str`):
        The fully qualified name of the layer.
r   )r   r    )r   r   r   r   s       r   #filter_first_and_last_linear_layersr#   H   s!    " !>f EL|>YZZr   c                 f    SSK Jn  U R                  5        H  u  p#[        X15      (       d  M    g   g)Nr   )Float8LinearTF)torchao.float8.float8_linearr%   r   r   )r   r%   r   r   s       r   has_ao_layersr'   ]   s-    9++-f++ . r   configr
   module_filter_funcc                 \    SSK Jn  [        U 5      u  pEUc  [        [        XE/S9nU" XUS9  g)a  
Converts all `nn.Linear` layers in the model (except the first and last) to torchao's `Float8Linear` layer inplace.

Args:
    model (`torch.nn.Module`):
        The model to convert.
    config (`torchao.float8.Float8LinearConfig`, *optional*):
        The configuration for the FP8 training. Recommended to utilize
        `torchao.float8.recipe_name_to_linear_config` to generate this. In general, the default config should be
        sufficient (what is passed when set to `None`).
    module_filter_func (`Callable`, *optional*, defaults to `filter_linear_layers`):
        Optional function that must take in a module and layer name, and returns a boolean indicating whether the
        module should be converted to FP8. Defaults to `filter_linear_layers`. See it for an example.

Example:

```python
from accelerate.utils.ao import convert_model_to_fp8_ao

model = MyModel()
model.to("cuda")
convert_to_float8_training(model)

model.train()
```
r   )convert_to_float8_trainingNr"   )module_filter_fnr(   )torchao.float8r+   r   r   r    )r   r(   r)   r+   r   r   s         r   convert_model_to_fp8_aor.   g   s7    @ : =e DL!$%9\LghuRXYr   )__doc__	functoolsr   typingr   r   r   r   importsr   r	   r&   r
   r   Moduler   strlistboolr    r#   r'   r.    r   r   <module>r8      s     4 4  ; C% %"c T#Y 4 .[S [T [*     .2-P$Z88??$Z)*$Z !*$Z $Zr   