
    i5                        S SK r S SKJrJr  SSKJr  SSKJr  SSKJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SSKJr  SS	K J!r!  SS
K"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)  SSK*J+r+  SSK,J-r-  SSK.J/r/  SSK0J1r1  SSK2J3r3  SSK4J5r5  SSK6J7r7  SSK8J9r9  SSK:J;r;  SSK<J=r=  SSK>J?r?  SSK@JArA  SSKBJCrC  SSKDJErE  SSKFJGrG  0 S\%_S\)_S\+_S \7_S!\!_S"\?_S#\A_S$\5_S%\/_S&\9_S'\;_S(\-_S)\1_S*\E_S+\'_S,\G_S-\C_\3\#\=S..ErH0 S\_S\_S\_S%\_S \_S!\	_S"\_S#\_S$\_S'\_S(\_S)\_S&\_S*\_S+\_S,\_S-\_\\
\S..ErI\R                  " \K5      rL " S/ S05      rM " S1 S25      rNS3\O4S4 jrPS5\O4S6 jrQS7 rRg)8    N)OptionalUnion   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptqspqr)fp8z
auto-roundmxfp4c                   @    \ rS rSrSr\S\4S j5       r\S 5       rSr	g)AutoQuantizationConfigs   z
The Auto-HF quantization config class that takes care of automatically dispatching to the correct
quantization config given a quantization config stored in a dictionary.
quantization_config_dictc           	         UR                  S5      nUR                  SS5      (       d  UR                  SS5      (       a/  UR                  SS5      (       a  SOSn[        R                  U-   nOUc  [        S5      eU[        ;  a,  [        SU S	[        [        R                  5       5       35      e[        U   nUR                  U5      $ )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrH   rJ   suffix
target_clss        l/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/transformers/quantizers/auto.pyrY    AutoQuantizationConfig.from_dicty   s    /33NC#''>>BZB^B^_motBuBu 8 < <^U S SWY`F-<<vEL! \  ??1, @/44678: 
 6lC
##$<==    c                     [         R                  " U40 UD6n[        USS 5      c  [        SU S35      eUR                  nU R                  U5      nUR                  " S0 UD6  U$ )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized. )r   from_pretrainedgetattrrT   ra   rY   update)rZ   pretrained_model_name_or_pathkwargsmodel_configrH   ra   s         r]   rc   &AutoQuantizationConfig.from_pretrained   s    !112OZSYZ<!6=E;<Y;Z  [M  N  $0#C#C !mm,DE"",V,""r_   rb   N)
__name__
__module____qualname____firstlineno____doc__classmethoddictrY   rc   __static_attributes__rb   r_   r]   rF   rF   s   s6    
 > > >( 
# 
#r_   rF   c                       \ rS rSrSr\S\\\4   4S j5       r	\S 5       r
\S\\\4   S\\   4S j5       r\S 5       rS	rg
)AutoHfQuantizer   z
 The Auto-HF quantizer class that takes care of automatically instantiating to the correct
`HfQuantizer` given the `QuantizationConfig`.
ra   c           	      ^   [        U[        5      (       a  [        R                  U5      nUR                  nU[
        R                  :X  a  UR                  (       a  US-  nOUS-  nU[        ;  a,  [        SU S[        [        R                  5       5       35      e[        U   nU" U40 UD6$ )NrN   rM   rP   rQ   )
isinstancerp   rF   rY   rJ   r   rS   rK   rW   rT   rV   rX   )rZ   ra   rg   rJ   r\   s        r]   from_configAutoHfQuantizer.from_config   s     )400"8"B"BCV"W*77 -<<<"//''551, @/44678: 
 ,L9
-888r_   c                 R    [         R                  " U40 UD6nU R                  U5      $ )N)rF   rc   rw   )rZ   rf   rg   ra   s       r]   rc   AutoHfQuantizer.from_pretrained   s*    4DDEbmflm233r_   quantization_config_from_argsc           	      6   Ub  SnOSn[        U[        5      (       aA  [        U[        5      (       a  [        R                  " U5      nO[        R                  U5      nUbh  UR
                  R                  UR
                  R                  :w  a:  [        SUR
                  R                   SUR
                  R                   S35      e[        U[        [        [        [        [        [        45      (       aX  UbU  UR                  5       nUR                  5        H  u  pV[        XU5        M     US[!        UR#                  5       5       S3-  nUS:w  a-  [        U[        5      (       d  [$        R&                  " U5        U$ [(        R+                  U5        U$ )zt
handles situations where both quantization_config from args and quantization_config from model config are present.
zYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rv   rp   r	   rY   rF   	__class__rj   rT   r   r
   r   r   r   get_loading_attributesitemssetattrrV   rX   warningswarnloggerinfo)rZ   ra   r{   warning_msgloading_attr_dictattrvals          r]   merge_quantization_configs*AutoHfQuantizer.merge_quantization_configs   s    )4y 
 K)4007II&5&?&?@S&T#&<&F&FGZ&[# *5#--66:W:a:a:j:jj./B/L/L/U/U.VVm  oL  oV  oV  o_  o_  n` `F F  #YJacno  .9 = T T V.446	+37 7 ?EVE[E[E]@^?_  `}  ~  ~K"Z0C[%Q%QMM+& #" KK$""r_   c           	         U R                  SS 5      nU R                  SS5      (       d  U R                  SS5      (       a/  U R                  SS5      (       a  SOSn[        R                  U-   nOUc  [        S5      eU[        ;  a8  [
        R                  SU S	[        [        R                  5       5       S
35        gg)NrJ   rK   FrL   rM   rN   rO   rP   rQ   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rR   r   rS   rT   rU   r   warningrV   rW   rX   )rH   rJ   r[   s      r]   supports_quant_method%AutoHfQuantizer.supports_quant_method   s    /33NDI#''>>BZB^B^_motBuBu 8 < <^U S SWY`F-<<vEL! \  ??NN1, @/44678 9ii
 r_   rb   N)rj   rk   rl   rm   rn   ro   r   r   rp   rw   rc   r   r   staticmethodr   rq   rb   r_   r]   rs   rs      s    
 9e4KT4Q.R 9 90 4 4 2#"4)@#@A2# (00G'H2# 2#h  r_   rs   methodc                    ^  U 4S jnU$ )z-Register a custom quantization configuration.c                    > T[         ;   a  [        ST S35      e[        U [        5      (       d  [	        S5      eU [         T'   U $ )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rU   rT   
issubclassr   	TypeError)rZ   r   s    r]   register_config_fn8register_quantization_config.<locals>.register_config_fn  sK    55xx/CDEE#677HII36(0
r_   rb   )r   r   s   ` r]   register_quantization_configr   
  s     r_   namec                    ^  U 4S jnU$ )zRegister a custom quantizer.c                    > T[         ;   a  [        ST S35      e[        U [        5      (       d  [        S5      eU [         T'   U $ )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)rW   rT   r   r   )rZ   r   s    r]   register_quantizer_fn1register_quantizer.<locals>.register_quantizer_fn  sJ    )){4&0DEFF#{++@AA'*t$
r_   rb   )r   r   s   ` r]   register_quantizerr     s     ! r_   c                 p   [        U S5      nU(       a&  [        R                  U R                  5      (       d  SnU(       d  UbR  U(       a&  [        R	                  U R                  U5      U l        OXl        [        R                  U R                  US9n	OS n	U	b  U	R                  UUUUUS9  U	R                  U5      nU	R                  U5      nU	R                  U 5      n U	R                  U 5      n [        U	R                  SS5      (       d&  U	R                  R                  n
[        U
SU
5      US'   XX%4$ )Nra   F)pre_quantized)dtypefrom_tf	from_flax
device_mapweights_only
dequantizevaluequant)hasattrrs   r   ra   r   rw   validate_environmentupdate_dtypeupdate_device_mapupdate_tp_planupdate_ep_planrd   rJ   )configra   r   r   r   r   r   
user_agentr   hf_quantizerrJ   s              r]   get_hf_quantizerr   *  s8   F$9:M_BB6C]C]^^+7)8)S)S**,?*F& *=&&22&&' 3 

 ))!% 	* 	
 ))%0!33J?
,,V4,,V4 |77uMM';;HHL"),"NJw22r_   )Sr   typingr   r   models.auto.configuration_autor   utilsr   utils.quantization_configr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr   quantizer_auto_roundr    quantizer_awqr!   quantizer_bitnetr"   quantizer_bnb_4bitr#   quantizer_bnb_8bitr$   quantizer_compressed_tensorsr%   quantizer_eetqr&   quantizer_fbgemm_fp8r'   quantizer_finegrained_fp8r(   quantizer_fp_quantr)   quantizer_gptqr*   quantizer_higgsr+   quantizer_hqqr,   quantizer_mxfp4r-   quantizer_quantor.   quantizer_quarkr/   quantizer_spqrr0   quantizer_torchaor1   quantizer_vptqr2   rW   rU   
get_loggerrj   r   rF   rs   strr   r   r   rb   r_   r]   <module>r      s    " 7      .  + 4 ' / 2 2 F + 6 @ 2 + - ) - / - + 1 +	<+ + O	
 O   " O  
> 6 & !   O!" O#$ %$) .$	9$+$ +$ J	$
 J$ J$ l$ [$ $ 
9$ 1$ /$ [$ }$ $  J!$" J#$$  !)$  . 
		H	%&# &#Rk k\  !S ! %3r_   