
    o9i	                        d dl Z d dlZd dlZd dlmZ  e       sdej
                  fdZyd dlZd dlm	Z
 d dlmZmZ  ej                   ej                  i dd       ej                  i d	d       ej                  i d
d       ej                  i dd       ej                  i d       ej                  i d	       ej                  i d
       ej                  i d       ej                  i d       ej                  i d	       ej                  i d
       ej                  i d      gdg      ej                   de
j"                  de
j"                  fd              Zdej
                  fdZy)    N)is_triton_availablexc                      y )N )r   s    n/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/bitsandbytes/triton/quantize_rowwise.pyquantize_rowwiser      s    $    )early_config_pruneestimate_matmul_time      )
num_stages	num_warps      )r   )r   
n_elements)configskey
BLOCK_SIZEP2c                    t        j                  d      }||z  }t        j                  d|      }||z   }	||k  }
t        j                  | |	z   |
      }t        j                  |      }t        j
                  t        j                  |
|d      d      }t         j                  j                  d||z  z        }t        j                  ||	z   ||
       t        j                  ||z   |       y )Nr   )axis)maskg     _@)
tl
program_idarangeloadabsmaxwhere	libdevicellrintstore)x_ptr
output_ptroutput_maxsr   r   r   pidblock_startr   offsetsrow_maskr   abs_xmax_valoutputs                  r   _quantize_rowwiser.      s    4 mm#J&1b!&J&GGEGO(3q	&&(E15A>$$TQ[%9:
g%vH=
s"G,r	   c           	          t        j                   j                   j                  t         j                  d}t        j                   j                  d    j                  t         j
                        }t        dt        j                  t        j                   j                  d               z        } j                  r|j                  sJ |j                         } fd}t        |    ||| j                  d   |       ||fS )N)devicedtyper   r   r   c                 $    j                   d   fS )Nr   )shape)metar   s    r   <lambda>z"quantize_rowwise.<locals>.<lambda>A   s    QWWQZM r	   )r   r   )torchemptyr3   r0   int8float16intmathceillog2is_cudanumelr.   )r   r-   r&   r   r   grids   `     r   r   r   9   s    aggahhejjIkk!''!*QXXU]]Styy1771:!6789yyV^^++\\^
)$6;
qwwWXz^`a{""r	   )r;   r6   time bitsandbytes.triton.triton_utilsr   Tensorr   tritontriton.languagelanguager   triton.ops.matmul_perf_modelr
   r   autotuneConfigjit	constexprr.   r   r	   r   <module>rL      s[      @6ELL6  U
 V__bQ!<bQ!<bQ!<bQ!<bQ/bQ/bQ/bQ/bA.bA.bA.bA. " ZZ-
 LL- LL- #$-*
#ELL 
#r	   