
    ;i&              '       B   S SK Jr  S SKJr  S SKrSSKJrJrJr  \	" \S5      (       a$  \R                  R                  5       R                  OSr\" \\\R                  5      rS\R"                  S	\R"                  S
\S\\R"                  \R"                  4   4S jrS\R"                  S\R"                  S	\R"                  S
\S\R*                  S\R"                  4S jrS\R"                  S\R"                  S	\R"                  S
\S\R*                  S\R"                  SS4S jrS\R"                  S
\S\S\R*                  S\\R"                  \R"                  4   4
S jrS\R"                  S\R"                  S
\S\S\\   S\R*                  S\R"                  4S jrS\R"                  S\R"                  S
\S\S\\   S\R*                  S\R"                  SS4S jrS\R"                  S\R"                  S\\   S\R"                  S	\R"                  S
\S\R"                  4S jr\R:                  r   S2S\S\R"                  S\R"                  S\R"                  S\\R"                     S \S!\S"\S#\S$\S%\S&\S'\R"                  S(\\R"                     S)\R"                  S*\\R"                     S+\S,\SS4&S- jjr S3S\S\R"                  S\R"                  S\R"                  S\\R"                     S.\\R"                     S/\S0\S \S!\S"\S#\S$\S+\S%\S&\S,\SS4$S1 jjr g)4    )Sequence)OptionalN   )kernels_4bitkernels_8bit_quantkernels_optimacceleratorcudaAcode	blocksizereturnc                     [         R                  " U5        [        R                  U R                  5         [        R
                  " XU5      u  p4X4R                  5       4sS S S 5        $ ! , (       d  f       g = f)N)torch_check_is_sizetorch_accelerator_moduledevicer   quantize_blockwise_tritonfloat)r   r   r   outabsmaxs        p/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/backends/triton/ops.pyquantize_blockwiser      sN    	#	!	(	(	2(BB1IVLLN" 
3	2	2s   *A**
A8r   dtypec           	      D  ^  [         R                  " U5        [         R                  " T R                  [         R                  :H  U 4S j5        [
        R                  T R                  5         [        R                  " T UUUUS9nS S S 5        U$ ! , (       d  f       W$ = f)Nc                  "   > ST R                    3$ NzA must be uint8, got r   r   s   r   <lambda>&dequantize_blockwise.<locals>.<lambda>       3H	1R    r   )	r   r   _checkr   uint8r   r   r   dequant_8bit_blockwiser   r   r   r   r   r   s   `     r   dequantize_blockwiser(      s|     
#	LLEKK')RS	!	(	(	2 77
 
3 J 
3	2 Js   -B
Br   c           
      d  ^ ^^ [         R                  " U5        [         R                  " T R                  [         R                  :H  U 4S j5        [         R                  " TR
                  T R
                  :H  U U4S j5        [         R                  " TR                  T R                  :H  U U4S j5        [         R                  " TR                  T:H  UU4S j5        [        R                  T R                  5         [        R                  " T UUUTTS9  S S S 5        g ! , (       d  f       g = f)Nc                  "   > ST R                    3$ r   r   r   s   r   r    .dequantize_blockwise_inplace.<locals>.<lambda>3   r"   r#   c                  <   > ST R                    STR                    3$ NzExpected out.shape == , got shaper   r   s   r   r    r+   4   s     1GyPVWZW`W`Va/br#   c                  <   > ST R                    STR                    3$ )NzExpected out.device == r.   )r   r1   s   r   r    r+   5   s     3J188*TZ[^[e[eZf1gr#   c                  (   > ST  STR                    3$ NzExpected out.dtype == r.   r   r   r   s   r   r    r+   6       /EeWFSVS\S\R]-^r#   r5   )
r   r   r$   r   r%   r0   r   r   r   r&   r'   s   `   ``r   dequantize_blockwise_inplacer7   *   s     
#	LLEKK')RS	LLagg%'bc	LLqxx')gh	LLe#%^_	!	(	(	211	
 
3	2	2s   >D!!
D/
quant_typequant_storagec                   ^  [         R                  " U5        [         R                  " T R                  [         R                  [         R
                  [         R                  4;   U 4S j5        T R                  5       nXAS-  * -  * n[         R                  " US-  4T R                  T R                  S9n[         R                  " US-  S4T R                  [         R                  S9n[        R                  T R                  5         [        R                  " T XXVXGS9  S S S 5        UnU[         R                  :w  a.  UR                  5       R                  U5      R!                  S5      nXR#                  5       4$ ! , (       d  f       Nc= f)Nc                  "   > ST R                    3$ )NzDBlockwise 4bit quantization only supports 16/32-bit floats, but got r   r   s   r   r    quantize_4bit.<locals>.<lambda>J   s    VWXW^W^V_`r#      )r   r   r   )num_elementsquantized_out)r   r   r$   r   bfloat16float16float32numelemptyr   r%   r   r   quantize_4bit_blockwise_tritonsqueezeview	unsqueezer   )	r   r   r8   r9   nblocksr   r   packeds	   `        r   quantize_4bitrL   C   s    
#	LL	ENNEMM5==AA`
 	
	A
 !m$$%F[[&1*qxxqwwGF
++qAvqk!((%++
FC	!	(	(	233yf1	
 
3 F###M2<<Q?<<>!! 
3	2s   E;;
F	r0   c           
      `  ^ [         R                  " U5        [         R                  " T[         R                  [         R                  [         R
                  4;   U4S j5        U R                  [         R                  :w  a<  U R                  5       R                  [         R                  5      R                  S5      n [         R                  " UTU R                  S9n[        R                  U R                  5         [        R                  " XX#TUS9  S S S 5        U$ ! , (       d  f       U$ = f)Nc                     > ST  3$ )NzFBlockwise 4bit dequantization only supports 16/32-bit floats, but got  r   s   r   r    !dequantize_4bit.<locals>.<lambda>o   s    XY^X_`r#   r   r   r   r   )r   r   r$   r@   rA   rB   r   r%   rF   rG   rH   rD   r   r   r   dequantize_4bit_implr   r   r   r8   r0   r   r   s        ` r   dequantize_4bitrU   c   s     
#	LL%..%--??` 	ww%++IIKU[[)33A6
++e5
:C	!	(	(	2))!YEWZ[ 
3 J 
3	2 Js   <D
D-c           
      N  ^^^ [         R                  " TR                  T:H  UU4S j5        [         R                  " TR                  T:H  UU4S j5        [        R                  U R
                  5         [        R                  " XX#TTS9  S S S 5        g ! , (       d  f       g = f)Nc                  (   > ST ST R                    3$ r-   r/   )r   r0   s   r   r    )dequantize_4bit_inplace.<locals>.<lambda>   r6   r#   c                  (   > ST  STR                    3$ r4   r   r5   s   r   r    rX      r6   r#   rR   )r   r$   r0   r   r   r   r   rS   rT   s       ```r   dequantize_4bit_inplacerZ      sh     
LLe#%^_	LLe#%^_	!	(	(	2))!YEWZ[ 
3	2	2s   5B
B$BshapeBc           
         UR                   [        R                  :w  a<  UR                  5       R	                  [        R                  5      R                  S5      n[        R                  " X R                   U R                  S9n[        R                  U R                  5         [        R                  " UUUUU R                   US9  [        R                  R                  R                  U US S9sS S S 5        $ ! , (       d  f       g = f)Nr   rQ   r5   )bias)r   r   r%   rF   rG   rH   rD   r   r   r   !dequantize_4bit_impl_passing_codenn
functionallinear)r   r[   r\   r   r   r   B_dq_tritons          r   	gemv_4bitrd      s     	ww%++IIKU[[)33A6++fGGAHHEK	!	(	(	266''	
 xx"")) * 
 
3	2	2s   #AC99
Doptimizer_namegpstate1state2beta1beta2beta3alphaepssteplrqmap1qmap2absmax1absmax2weight_decaygnorm_scalec                     [         R                  UR                  5         [        S0 SU _SU_SU_SU_SU_SU_SU_SU_S	U_S
U	_SU
_SU_SU_SU_SU_SU_SU_SU_SU_6  S S S 5        g ! , (       d  f       g = f)Nre   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   
skip_zerosrO   )r   r   $optimizer_update_8bit_blockwise_impl)re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rx   s                      r   optimizer_update_8bit_blockwiserz      s    j 
"	(	(	7, 	
)	
	
 	
 		

 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
  !	
" &#	
$ $%	
& "'	
 
8	7	7s   AA,,
A:	unorm_vec	max_unorm
param_normc                 
   [         R                  UR                  5         [        R                  " S0 SU _SU_SU_SU_SU_SU_SU_SU_S	U_S
U	_SU
_SU_SU_SU_SU_SU_SU_SU_6  S S S 5        g ! , (       d  f       g = f)Nre   rf   rg   rh   ri   r{   r|   r}   rj   rk   rl   rm   rn   ru   ro   rp   rv   rx   rO   )r   r   r   optimizer_update_32bit_impl)re   rf   rg   rh   ri   r{   r|   r}   rj   rk   rl   rm   rn   ru   ro   rp   rv   rx   s                     r   optimizer_update_32bitr     s    ( 
"	(	(	711 	
)	
	
 	
 		

 	
  	
  	
 "	
 	
 	
 	
 	
 	
 &	
 	
  !	
" $#	
$ "%	
 
8	7	7s   AA44
B)g        g      ?F)F)!collections.abcr   typingr   r    r   r   r   hasattrr	   current_acceleratortypedevice_typegetattrr
   r   Tensorinttupler   r   r(   r7   strrL   rU   rZ   rd   ry   r   rz   r   rO   r#   r   <module>r      s   $   = = ?Fe]>[>[e335::ag"5+uzzB #%,, #ell #s #uUZUaUachcocoUoOp #||"\\16JMV[VaVa
\\"
||
LL
 ,,
 	

 ;;
 

 

2"||" #"14"EJ[["
5<<%&"@||LL  	
 C= ;; \\:\||\LL\ \ 	\
 C=\ ;;\ 
\ 
\
||
||
 SM
 LL	

 ,,
 
 \\
D (5'Y'Y $( 'J
J
||J
 ||J
 LL	J

 U\\"J
 J
 J
 J
 J
 
J
 J
 	J
 <<J
 ELL!J
 \\J
  ell#!J
" #J
$ %J
( 
)J
~ %(
(
||(
 ||(
 LL	(

 U\\"(
 %(
 (
 (
 (
 (
 (
 (
 
(
 (
 (
  	!(
" #(
& 
'(
r#   