
    ;iQ              	       4   S SK r S SKJr  \" 5       (       d  S rS\ R                  4S jrgS SKrS SKJr	  \R                  " \R                  " SS0SS	9\R                  " SS
0SS9/S/S9\R                  S\	R                  4S j5       5       rS\ R                  4S jr\R                  " \R                  " SSSS.SS	9\R                  " SSSS.SS	9/SS/S9\R                  S\	R                  S\	R                  S\	R                  4S j5       5       rS rg)    N)is_triton_availablec                     g N )inputs    s/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/triton/quantize_global.pyquantize_global_transposer	              xc                     g r   r   )r   s    r   quantize_globalr   
   r
   r   
BLOCK_SIZEi      )	num_warpsi      )
num_stages
n_elements)configskeyc                 @   [         R                  " SS9nXT-  nU[         R                  " SU5      -   nXs:  n[         R                  " X-   US9n	[         R                  " U5      n
[         R                  R                  SX-  -  5      n[         R                  " X'-   XS9  g )Nr   )axismask     _@)tl
program_idarangeload	libdevicellrintstore)x_ptrabsmax_inv_ptr
output_ptrr   r   pidblock_startoffsetsr   r   
absmax_invoutputs               r   _quantize_globalr+      s     mm#&		!Z 88#GGEO$/WW^,
$$Uan%=>
%v9r   c                 b  ^ U R                  5       R                  5       R                  S5      nSU-  n[        R                  " U R
                  S[        R                  S.6nU R                  (       a  UR                  (       d   eUR                  5       mU4S jn[        U   " XUT5        X14$ )Nr         ?cudadevicedtypec                 :   > [         R                  " TU S   5      4$ )Nr   tritoncdiv)metar   s    r   <lambda>!quantize_global.<locals>.<lambda>/   s    V[[T,5GHJr   )
absmax	unsqueezetorchemptyshapeint8is_cudanumelr+   )r   absmaxr)   r*   gridr   s        @r   r   r   )   s~    ((+6\
aggfEJJGyyV^^++\\^
JqfjA~r         )BLOCK_MBLOCK_NGROUP_MMNrF   rG   rH   c                 8   [         R                  " S5      nXy-   S-
  U	-  nX-   S-
  U
-  nX-  nX-  n[        UUU-  -
  U5      nUU-  UU-  -   nX-  U-  nUU	-  [         R                  " SU	5      -   nUU
-  [         R                  " SU
5      -   nU US S 2S 4   U-  US S S 24   U-  -   -   n UU:  S S 2S 4   UU:  S S S 24   -  n[         R                  " U US9n[         R                  " U5      nUU	-  [         R                  " SU	5      -   nUU
-  [         R                  " SU
5      -   nUUS S 2S 4   U-  US S S 24   U-  -   -   nUU:  S S 2S 4   UU:  S S S 24   -  n[         R
                  R                  SUU-  -  5      n[         R                  " UUUS9  g )Nr   r   r   r   )r   r   minr   r   r    r!   r"   )Ar$   B	stride_am	stride_an	stride_bn	stride_bmrI   rJ   rF   rG   rH   r&   grid_mgrid_nwidthgroup_id
group_sizepid_mpid_nrmrnr   ar)   r*   s                             r   _quantize_global_transposer]   4   s   . mmA+/g-+/g- <(W"44g>
7"cJ&67+W_ryyG44W_ryyG44AtGy(2dAg;+BBCQ4 BFD!G#44GGAD!WW^,
 W_ryyG44W_ryyG44AtGy(2dAg;+BBCQ4 BFD!G#44$$Ua*n%=>
F&r   c                   ^^ U R                  5       R                  5       R                  S5      nSU-  nU R                  u  mm[        R
                  " TTS[        R                  S9nUR                  S5      T:X  a  UR                  S5      T:X  d   eU R                  S5      S:X  d  U R                  S5      S:X  d   eUR                  S5      S:X  d  UR                  S5      S:X  d   eUU4S jn[        U   " U UUU R                  S5      U R                  S5      UR                  S5      UR                  S5      TT5	        X14$ )Nr   r-   r.   r/   r   c                 p   > [         R                  " TU S   5      [         R                  " TU S   5      -  4$ )NrF   rG   r3   )METArI   rJ   s    r   r7   +quantize_global_transpose.<locals>.<lambda>p   s,    V[[DO<v{{1dS\o?^^`r   )
r9   r:   r;   r>   r<   r=   r?   sizestrider]   )r   rB   r)   outrC   rI   rJ   s        @@r   r	   r	   f   s   ",,Q/6\
{{1kk!QvUZZ@xx{aCHHQK1$444||A!#u||A!';;;zz!}!SZZ]a%777`"4(LLOLLOJJqMJJqM
	
 {r   )r<    bitsandbytes.triton.triton_utilsr   r	   Tensorr   r4   triton.languagelanguager   autotuneConfigjit	constexprr+   r]   r   r   r   <module>rm      sA    @5<<    __MM<.!<MM<.1=
 N ZZ:
 LL: : 5<<  __MMccaHTUVMMccaHTUV

 #J ZZ'' '' '' '' ''Rr   