
    ;i                        S SK Jr  S SKJrJrJr  S SKrS SKJr  S SK	Js  J
s  Jr  SSKJr   " S S\R                   5      rS\R                   S	\S
\\\4   S\R(                  4S jr   S S\R                   S	\S\S\S   S\\   4
S jjrS\R                   S\\S4   S\4S jrS\R                   S\\S4   4S jrS\R                   S	\4S jrSS.S\R                   S\\\4   S\S\S	\S\\\4   SS4S jjrg)!    )partial)AnyLiteralOptionalN   )
functionalc                      ^  \ rS rSrSrS\R                  4U 4S jjr\R                  " 5       S\R                  S\R                  4S j5       rSrU =r$ )	Bnb4bitParametrization   a  
A parametrization module that handles dequantization of a 4-bit quantized parameter.

The parameter data is expected to be already quantized when this parametrization is applied.
This module will dequantize the parameter data to its original floating-point representation
when the forward method is called (i.e. when the parameter is accessed).

Args:
    quant_state (`F.QuantState`):
        The quantization state containing the necessary information for dequantization.
quant_statec                 .   > [         TU ]  5         Xl        g N)super__init__r   )selfr   	__class__s     k/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/nn/parametrize.pyr   Bnb4bitParametrization.__init__   s    &    quantized_paramreturnc                 B    [         R                  " XR                  5      $ )z
Forward pass to dequantize the parameter.

Args:
    quantized_param (`torch.Tensor`): The quantized parameter tensor (from .original)

Returns:
    `torch.Tensor`: The dequantized parameter tensor in the original shape and dtype.
)Fdequantize_4bitr   )r   r   s     r   forwardBnb4bitParametrization.forward   s       2B2BCCr   )r   )__name__
__module____qualname____firstlineno____doc__r   
QuantStater   torchno_gradTensorr   __static_attributes____classcell__)r   s   @r   r
   r
      sL    
'ALL ' ]]_
Du|| 
D 
D 
Dr   r
   module
param_nameqs_dictdevicec                 B   [        X5      (       d  [        SU S35      e[        X5      n[        U[        R
                  5      (       d  [        SU S35      e[        R                  R                  X#S9n[        R                  " X[        U5      SS9  [        X5        g )N Module does not have parameter ''Parameter '$' is not an instance of nn.Parameter)r+   Tunsafe)hasattrAttributeErrorgetattr
isinstancenn	Parameter	TypeErrorr   r"   	from_dictPregister_parametrizationr
   _register_parametrization_hooks)r(   r)   r*   r+   original_paramr   s         r   #replace_parameter_4bit_prequantizedr?   *   s     6&&?
|1MNNV0Nnbll33+j\1UVWW,,(((@K v3I+3V_cd $F7r   compress_statistics
quant_type)nf4fp4	blocksizec           	         [        X5      (       d  [        SU S35      e[        X5      n[        U[        R
                  5      (       d  [        SU S35      e[        R                  " UR                  UUUS9u  pg[        X[        R
                  " USS95        A[        R                  " X[        U5      SS	9  [        X5        g
)a  
Replace a module parameter with a 4-bit quantized version using parametrization.

This function quantizes an existing parameter in a PyTorch module to 4-bit precision
and sets up parametrization to handle automatic dequantization during forward passes.
The original parameter is replaced with quantized data, and a parametrization layer
is registered to manage the quantization state and dequantization process.

Additional, it registers a state dict post-hook to ensure that the quantization state
is saved correctly when the model's state dict is saved.

It is useful for MoE models or other scenarios where you want to quantize parameters
outside of nn.Linear layers without changing the model's architecture.

<Tip warning={true}>This feature is experimental and may change in future releases.</Tip>

Args:
    module (`nn.Module`):
        The PyTorch module containing the parameter to be quantized.
    param_name (`str`):
        The name of the parameter within the module to quantize.
    compress_statistics (`bool`, *optional*, defaults to `False`):
        Whether to compress quantization statistics to reduce memory usage.
    quant_type (`Literal["nf4", "fp4"]`, *optional*, defaults to `"nf4"`):
        The quantization format to use.
    blocksize (`int`, *optional*, defaults to `None`):
        The block size for quantization. If None, uses the default block size.

Raises:
    AttributeError: If the module does not have the specified parameter.
    TypeError: If the specified attribute is not an instance of nn.Parameter.
r-   r.   r/   r0   )rD   r@   rA   F)requires_gradTr1   N)r3   r4   r5   r6   r7   r8   r9   r   quantize_4bitdatasetattrr;   r<   r
   r=   )r(   r)   r@   rA   rD   r>   quantized_datar   s           r   replace_parameter_4bitrK   >   s    P 6&&?
|1MNNV0Nnbll33+j\1UVWW #$///	#N F^5 QR v3I+3V_cd $F7r   inputs.outputc                 x    [         =R                  S-  sl        [         R                  (       d  0 [         l        g g N   )r;   _cache_enabled_cache)r(   rL   rM   s      r   _disable_parametrization_cacherS      s(     r   c                 6    [         =R                  S-  sl        g rO   )r;   rQ   )r(   rL   s     r   _enable_parametrization_cacherU      s    r   c                     [         R                  S:  a  U R                  [        [        US95        U R                  [        5        U R                  [        5        g )N)r      r)   )	r#   __version__register_state_dict_post_hookr   "_parametrized_state_dict_post_hookregister_forward_pre_hookrU   register_forward_hookrS   )r(   r)   s     r   r=   r=      sL    F",,2%	
 $$%BC
  !?@r   weightrX   
state_dictprefixlocal_metadatakwargsr   c                j   U SU S3nXa;   a  U U 3nUR                  U5      X'   [        R                  " X5      (       d   e[        [	        S U R
                  U   5      S5      nUc   S5       eUR                  n	U	b2  U	R                  SS9R                  5        H  u  pXU U SU
 3'   M     ggg)	zB
Hook to modify the state dict to include the quantization state.
zparametrizations.z	.originalc                 "    [        U [        5      $ r   )r6   r
   )xs    r   <lambda>4_parametrized_state_dict_post_hook.<locals>.<lambda>   s    Z+ABr   Nz,Parametrization not found for the parameter.T)packed.)	popr;   is_parametrizednextfilterparametrizationsr   as_dictitems)r(   r_   r`   ra   r)   rb   original_key	clean_keyparametrizationr   kvs               r   r[   r[      s     X.zl)DL! hzl+	 *| <
  4444 37BFD[D[\fDghjn3
 *Z,ZZ*%11 "#++4+8>>@:;fXj\1#67 A #' "r   )FrB   N)	functoolsr   typingr   r   r   r#   torch.nnr7   torch.nn.utils.parametrizeutilsparametrizer;    r   r   Moduler
   strdictr+   r?   boolintrK   tuplerS   rU   r=   r[    r   r   <module>r      sv    ) )   & & DRYY D>8II8#&815c3h8IN8. !&(-#@8II@8@8 @8 %	@8
 }@8F299 eCHo WZ ")) U38_ ABII A3 A. $<II$<S#X$< $< 	$< $< 38n$< 
$<r   