
    9i                     X    d Z ddlmZmZ ddlmZmZ  e       rddlmZ 	 	 	 	 dde	fdZ
y)	z;AWQ (Activation aware Weight Quantization) integration file   )is_auto_awq_availableis_torch_available)AwqBackendPackingMethodAWQLinearVersion    Nreturnc           	         |g }|j                   }t               st        d      |t        j                  k(  r	ddlm}m} n|t        j                  k(  rddl	m
} |t        j                  k(  r"|j                  t        j                  k(  rn}	n}	| j                         D ]  \  }
}g j                  |
       t!        |t"        j$                        r|
|vrt'        fd|D              s|j(                  }|j*                  } |	|j,                  |j.                  |||j0                  du|j2                  j4                        | j6                  |
<   d}| j6                  |
   j9                  d	       t;        t=        |j?                                     dkD  rtA        ||||
      \  }}jC                  d         | |fS )a  
    Public method that recursively replaces the Linear layers of the given model with AWQ quantized layers.
    `accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
    conversion has been successfull or not.

    During the module replacement, we also infer the backend to use through the `quantization_config` object.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`AwqConfig`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list`, *optional*):
            A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
            converted.
        current_key_name (`list`, *optional*):
            A list that contains the current key name. This is used for recursion and should not be passed by the user.
        has_been_replaced (`bool`, *optional*):
            A boolean that indicates if the conversion has been successful or not. This is used for recursion and
            should not be passed by the user.
    NzAWQ (either `autoawq` or `llmawq`) is not available. Please install it with `pip install autoawq` or check out the installation guide in https://github.com/mit-han-lab/llm-awqr   )WQLinear_GEMMWQLinear_GEMV)WQLinearc              3   D   K   | ]  }|d j                        v   yw).N)join).0keycurrent_key_names     g/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/transformers/integrations/awq.py	<genexpr>z*replace_with_awq_linear.<locals>.<genexpr>N   s      [Sschh'788[s    )w_bit
group_sizein_featuresout_featuresbiasdevTF)modules_to_not_convertr   quantization_confighas_been_replaced)"backendr   
ValueErrorr   AUTOAWQawq.modules.linearr
   r   LLMAWQawq.quantize.qmoduler   versionr   GEMMnamed_childrenappend
isinstancennLinearanyr   r   bitsr   r   weightdevice_modulesrequires_grad_lenlistchildrenreplace_with_awq_linearpop)modelr   r   r   r   r   r
   r   r   
target_clsnamemoduler   r   _s      `           r   r5   r5      s   8 %!#!))G " ~
 	
 )111CC	+22	21)111&9&A&AEUEZEZ&Z]`m

,,.  !f#!%fbii(T9O-O[DZ[[$00%22'1-222== +!-D0,,(t$ %)! t$33E:tFOO%&'!+#:'=!1$7"3$ A  	R A !B ###    )NNNF)__doc__utilsr   r   utils.quantization_configr   r   torch.nnr*   boolr5    r<   r   <module>rC      s:    > = Q 
  Q$ 
Q$r<   