
    ;i@                     f   S SK Jr  S SKJr  S SKJrJr  S SKrS SKJr  S SK	r	S SK
Jr  S SKJr    " S S5      r\" S	\S
9S\\	R"                  /\	R"                  4   S\\\4   4S j5       r\	R*                  R,                  r\" S	\S
9S\	R"                  S\	R0                  S\	R"                  4S j5       r\ " S S5      5       r " S S\	R6                  R8                  5      r " S S\	R6                  R8                  5      r " S S\	R6                  R8                  5      r    S"S\	R"                  S\	R"                  S\\	R"                     S\\   S\\	R"                     4
S jjr   S#S\	R"                  S\	R"                  S \RB                  S\\	R"                     S\\	R"                     4
S! jjr"g)$    )	dataclass)prod)CallableOptionalN)warn)
deprecatedc                   @    \ rS rSrSrS rS r\S 5       rS r	S r
Srg)	GlobalOutlierPooler   Nc                     [        S5      e)NzCall get_instance() instead)RuntimeErrorselfs    p/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/autograd/_functions.py__init__GlobalOutlierPooler.__init__   s    899    c                 0    [        5       U l        S U l        g N)setoutliers	model_dimr   s    r   
initializeGlobalOutlierPooler.initialize   s    r   c                     U R                   c0  U R                  U 5      U l         U R                   R                  5         U R                   $ r   )	_instance__new__r   )clss    r   get_instance GlobalOutlierPooler.get_instance!   s6    == KK,CMMM$$&}}r   c                     U R                   c  X l         X R                   :w  a  g U R                  R                  UR                  5       5        g r   )r   r   updatetolist)r   outlier_idxfeature_dims      r   add_outliers GlobalOutlierPooler.add_outliers(   s9    >>!(N..([//12r   c                     [         R                  " [        U R                  5      5      R	                  [         R
                  5      $ r   )torchTensorlistr   toint64r   s    r   get_current_outlier_idx+GlobalOutlierPooler.get_current_outlier_idx0   s)    ||D/033EKK@@r   )r   r   )__name__
__module____qualname____firstlineno__r   r   r   classmethodr   r&   r.   __static_attributes__ r   r   r
   r
      s0    I:  3Ar   r
   zDThis function is deprecated and will be removed in a future release.)categorytransform_tile	tile_sizec                 n   Uu  p#SX#-  s=:  a  S:  d   e   e[         R                  " X#-  [         R                  S9R                  X#5      n[         R                  " U5      n[        S5       H  n[         R                  " USU-  SS9S-  nUS-
  R                  [         R                  5      R                  5       n[         R                  " UR                  5       S-   U:H  5      (       d   S	5       eU " U5      n	U	R                  UR                  5      S-   n
XZSU-  -  -  nX#-  SU-  :  d  M    U$    U$ )
a  
Compute a permutation of indices that invert the specified (tiled) matrix transformation

:param transform_tile: a function that applies forward transform to a tensor of shape [dim1, dim2]
:param tile_size: higher-level tile dimensions, i.e. (8, 32) for Turing and (32, 32) for Ampere
:note: we assume that tile_transform applies to a cpu-based int8 tensor of shape tile_size
:example: transform_tile function for the turing layout (bitsandbytes.functional as F)
:returns: indices
r   l            dtype      trunc)rounding_mode   zint overflow)r)   aranger-   view
zeros_likerangedivr,   int8
contiguousallintr<   )r8   r9   d1d2tile_indicespermuted_tile_indicesiith_dim_indicessample_tile_ipermuted_tile_iith_permuted_indicess              r   get_inverse_transform_indicesrT   4   s(   " FBrw<<u{{;@@HL!,,\:1X))L#q&PSVV(3.225::>IIKyy**,s2oEFFVVF(7.11,2D2DEKa!@@7S!V    ! r   permuted_tensorrM   returnc                    U R                   UR                   su  p#u  pEX$-  X5-  s=:X  a  S:X  d   S5       e   S5       eU R                  SUR                  5       5      R                  5       n[        R
                  " U5      nXgUR                  5       '   UR                  XEX5-  X$-  5      nUR                  SSSS5      nUR                  X#5      R                  5       $ )z
Undo a tiled permutation such as turing or ampere layout

:param permuted_tensor: torch tensor in a permuted layout
:param tile_indices: reverse transformation indices, from get_inverse_transform_indices
:return: contiguous row-major tensor
r   z+tensor must contain a whole number of tiles         )	shapereshapenumeltr)   
empty_likeflattenpermuterH   )rU   rM   rowscols	tile_rows	tile_colstensoroutputss           r   undo_layoutri   Z   s     ,;+@+@,BTBT(LT(9t/414c6cc4c6cc4$$R););)=>@@BFv&G&,L  "#ooiD4EtGXYGooaAq)G??4&1133r   c                      \ rS rSr% Sr\\R                     \S'   Sr	\
\S'   Sr\\R                     \S'   Sr\\R                     \S'   Sr\\R                     \S'   Sr\\R                     \S	'   Sr\\R                     \S
'   Sr\\R                     \S'   Sr\\R                     \S'   Sr\\R                     \S'   Sr\\   \S'   SrSrSr\\R                     \S'   SrSrSrSrS r\S 5       rSrg)MatmulLtStatep   N_tile_indicesFforce_no_igemmltCBCxBSBSCBCxBtSBtCBtsubBoutlier_pool        idxTrowc                 f    S U l         S U l        S U l        S U l        S U l        S U l        S U l        g r   )ro   rp   rq   rr   rs   rt   ru   r   s    r   reset_gradsMatmulLtState.reset_grads   s3    	r   c                     [        S5      e)Nz$tile_indices is no longer supported.)
ValueErrorr   s    r   rM   MatmulLtState.tile_indices   s    ?@@r   )ro   ru   rp   rs   rq   rt   rr   ) r0   r1   r2   r3   rm   r   r)   r*   __annotations__rn   boolro   rp   rq   rr   rs   rt   ru   rv   rw   r
   has_accumulated_gradients	thresholdry   is_traininghas_fp16_weightsuse_poolformatBr|   propertyrM   r5   r6   r   r   rk   rk   p   s   ,0M8ELL)0"d"!%B%"&C%,,	&!%B%"&C%,,	&#'D(5<<
 '"&C%,,	&"&C%,,	&#'D(5<<
 '26L(./6 %I"&C%,,	&KHG A Ar   rk   c                   T   \ rS rSr\   SS\R                  R                  R                  S\R                  S\R                  S\
\R                     S\
\R                     S\
\   4S	 jj5       r\S\R                  R                  R                  S
\R                  4S j5       rSrg)MatMul8bitLt   NctxABoutbiasstatec           	      ^	   U=(       d
    [        5       nSU l        [        UR                  5      S:X  a  SU l        Xl        X l        X@l        UR                  S   UR                  S   :X  aG  [        R                  " UR                  S S UR                  SS  -   UR                  UR                  S9$ [        R                  " UR                  S S UR                  S S -   UR                  UR                  S9$ UR                  nUR                  [        R                  :w  a3  [        5       (       d$  [        R                  " SUR                   S35        [        UR                  5      S	:X  a  UR!                  SUR                  S   5      nU R"                  S   (       aA  [$        R&                  " UR)                  [        R                  5      UR*                  S
9u  pxpnOB[$        R,                  " UR)                  [        R                  5      UR*                  S
9u  pynS =pSnUR.                  (       d  UR0                  c  [3        USS 5      S LnUR5                  5       (       + =(       a!    UR                  S   UR7                  S5      :H  nU(       a  UR9                  5       nUR:                  (       a  U(       a  UR0                  b  UR<                  cQ  UR?                  5         [$        R,                  " UR)                  [        R                  5      5      u  Ul        Ul        nUR*                  S:  aM  Xl         [        RB                  RD                  RG                  UUUR0                  U	UR<                  UU5      u  nnOS[        RB                  RD                  RH                  RK                  XuR0                  XR<                  XAR                  S9nS nXPl&        X`l'        UR                  U l(        Uc  S OUR                  U l)        [U        U R"                  S S 5      (       a  UUU4U l+        XR@                  4U l,        O"/ SQU l+        SU l,        U R[                  S S 5        / US S QUR0                  R                  S   P7n[        U5      S	:X  a  UR!                  U5      $ U$ )NFr   TrX   r[   r<   devicez'MatMul8bitLt: inputs will be cast from z to float16 during quantizationrY   )r   gradrx   )r   r<   rZ   NNNNN).rk   is_emptyr   r\   r   r   r   r)   emptyr<   r   float16_is_compilingwarningsr   lenr]   needs_input_gradFint8_double_quantr,   r   int8_vectorwise_quantr   ro   getattris_contiguousstriderH   r   rr   r|   ry   opsbitsandbytesint8_mixed_scaled_mmint8_scaled_mmdefaultr   
grad_shapedtype_A
dtype_biasanytensorstensor_statessave_for_backward)r   r   r   r   r   r   input_shapeCACAtSCASCAtoutlier_colshas_gradis_transposed_outputsubAoutput_shapes                     r   forwardMatMul8bitLt.forward   s    ( =ACLEEHwwr{aggaj({{1773B<!''!"+#=QWWUVU]U]^^{{1773B<!''"1+#=QWWUVU]U]^^gg 77emm#MOOMMCAGG9Lklmqww<1		"aggbk*A "/0/B/B144CVbgbqbq/r,BS %&$;$;ADD<O[`[j[j$k!B\C!!UXX%5q&$/t;H ! 11OaggajAHHQK6OMLLN!!(uxx7G599K\!!# *+)@)@emmAT)U&%)Q ??S $I !9911FF		LFD YY++::BBHHc994ww C F D 	$gg!%4::s##BQ'((a.CK!%yy 1C,CK ,C!!$-=Sb)=588>>!+<={q >>,//r   grad_outputc                    U R                   (       ar  U R                  c  S O[        R                  " U R                  5      n[        R                  " U R                  5      [        R                  " U R
                  5      S US 4$ U R                  u  p4pVnU R                  u  pxn	U R                  u  pU R                  nS =n=pU(       a  UR                  SU R                  S9n[        UR                  5      S:X  a-  UR                  SUR                  S   5      R                  5       nU(       Ga  [         R"                  " UR%                  [        R&                  5      5      u  n  nnn[        R(                  R*                  R,                  R/                  UR1                  5       R                  5       UR1                  5       UU
[        R&                  S9nUR2                  S:  aK  UbH  UR5                  5       S:  a4  US S 2U4==   [        R6                  " UR1                  5       U5      -  ss'   U(       a  UR8                  b  UR8                  R%                  U R:                  SS9R=                  UR>                  RA                  S5      RC                  S	5      5      n[        R6                  " UR%                  U R:                  5      U5      RE                  U RF                  5      nO[I        S
5      eXS US 4$ )Nr   r;   rY   rX   rx   Tcopyr[   @ ?)State must contain CB matrix for backward)%r   r   r)   rD   r   r   r   r   r   r   sumr   r   r\   r]   rH   r   r   r,   r   r   r   r   r   r_   r   r^   matmulro   r   mul_rr   	unsqueezemulrC   r   	Exception)r   r   	bias_grad	req_gradA	req_gradBr   req_gradBiasr   r   r   r   ry   r   grad_Agrad_B	grad_biasCgradSCgradtro   s                      r   backwardMatMul8bitLt.backward   s?   << # 0e6F6Fsxx6PI##CEE*E,<,<SUU,CT9VZZZ363G3G0	aq{{1%%	"yy&***#@I {  !Q&%--b+2C2CB2GHSSUK&'&9&9+..:W&X#E1a!YY++::BB	$$&mm C F $)9djjlQ>Nq#v%,,{}}"EExx#XX[[4[8==eii>Q>QRS>T>X>XYd>efknnS[[&A2FKKCNN[ KLLtY44r   r6   r   )r0   r1   r2   r3   staticmethodr)   autogradfunctionFunctionCtxr*   r   rk   r   r   r5   r6   r   r   r   r      s    
 '+'+)-`^^$$00`<<` <<` ell#	`
 u||$` &` `D (5enn--99 (5 (5 (5r   r   c                   >    \ rS rSr\SS\4S j5       r\S 5       rSrg)MatMul8bitFpi)  Nc                    UR                   (       d  UR                  c  [        USS 5      S LnUR                  5       (       + =(       a!    UR                  S   UR                  S5      :H  nU(       a  UR                  5       nUR                  (       a  U(       a  UR                  b  UR                  c]  UR                  5         [        R                  " UR                  [        R                  5      5      u  Ul        Ul        nUR                  nUR                  R                  R                  UR                   5      R#                  UR                  R%                  S5      R'                  S5      5      n	[        R(                  R*                  R-                  XU5      n
XPl        UR                   U l        UR                  U l        Xl        Uc	  S U l        U
$ UR                   U l        U
$ )Nr   r   r[   r   )r   ro   r   r   r\   r   rH   r   rr   r|   r   r   r,   r)   r   datar<   r   r   r   nn
functionallinearr   r   r   r   r   )r   r   r   r   r   r   r   r   r   ro   r   s              r   r   MatMul8bitFp.forward/  sW   !!UXX%5q&$/t;H ! 11OaggajAHHQK6OMLLN!!(uxx7G599K\!!#)*)@)@emmAT)U&%)QHHXX]]agg&++EII,?,?,B,F,F{,ST$$++A48	gg!% 48::r   c                 2   U R                   u  p#pEnU R                  nU R                  nS =n=pU(       a  UR                  SU R                  S9n
[        UR                  5      S:X  a-  UR                  SUR                  S   5      R                  5       nU(       a3  [        R                  " UR                  5       U5      R                  5       n	U(       a  UR                  b  UR                  R                  U R                  SS9R                  UR                   R#                  S5      R%                  S5      5      n[        R                  " UR                  U R                  5      U5      R'                  U R(                  5      nO[+        S	5      eXS U
S 4$ )
Nr   r;   rY   rX   Tr   r[   r   r   )r   r   r   r   r   r   r\   r]   rH   r)   r   r_   ro   r,   r   r   rr   r   r   rC   r   r   )r   r   r   r   r   r   r   r   r   r   r   ro   s               r   r   MatMul8bitFp.backwardE  s>   363G3G0	aqEE		&***#@I {  !Q&%--b+2C2CB2GHSSUK\\!##%5779Fxx#XX[[4[8==eii>Q>QRS>T>X>XYd>efknnS[[&A2FKKCNN[ KLLtY44r   r6   )	r0   r1   r2   r3   r   rk   r   r   r5   r6   r   r   r   r   )  s1     #$m  * 5 5r   r   c                   Z    \ rS rSr\SS\\R                     4S jj5       r\S 5       r	Sr
g)
MatMul4Biti`  Nquant_statec                 X   SU l         [        UR                  5      S:X  a  SU l         Xl        X l        X@l        UR                  nUR                  S   US   :X  a=  [        R                  " UR                  S S USS  -   UR                  UR                  S9$ [        R                  " UR                  S S US S -   UR                  UR                  S9$ [        R                  R                  R                  U[        R                  " X%5      R                  UR                  5      R!                  5       U5      nXPl        UR                  UR                  Uc  S OUR                  sU l        U l        U l        [+        U R,                  S S 5      (       a  S U4U l        U$ SU l        U$ )	NFr   TrX   r[   r   rZ   r   )r   r   r\   r   r   r   r)   r   r<   r   r   r   r   r   dequantize_4bitr,   r_   r   r   dtype_Br   r   r   r   )r   r   r   r   r   r   B_shaper   s           r   r   MatMul4Bit.forwardd  sg    =ACLEEH!''Gwwr{gaj({{1773B<'!"+#=QWWUVU]U]^^{{1773B<'"1+#=QWWUVU]U]^^ $$++Aq/@/@/P/S/STUT[T[/\/^/^/`bfg  	3477AGGT\T_c_i_i0S[#.s##BQ'(()CK  'CKr   c                 d   U R                   (       ar  U R                  c  S O[        R                  " U R                  5      n[        R                  " U R                  5      [        R                  " U R
                  5      S US 4$ U R                  u  n  pEnU R                  u  pFSu  pxn	U(       a  UR                  SU R                  S9n	U(       a\  [        R                  " U[        R                  " X`R                  5      R                  UR                  5      R!                  5       5      nXxS U	S 4$ )Nr   r   r;   )r   r   r)   rD   r   r   r   r   r   r   r   r   r   r   r,   r<   r_   )
r   r   r   r   r   r   r   r   r   r   s
             r   r   MatMul4Bit.backward  s    << # 0e6F6Fsxx6PI##CEE*E,<,<SUU,CT9VZZZ+.+?+?(	1aq{{$4!	#@I \\+q/@/@II/N/Q/QR]RcRc/d/f/f/hiFtY44r   r6   r   )r0   r1   r2   r3   r   r   r   
QuantStater   r   r5   r6   r   r   r   r   `  s:     Xall=S  : 5 5r   r   r   r   r   r   r   c                     U=(       d
    [        5       nUS:  a  XCl        UR                  (       a1  U R                  R                  S;   a  [
        R                  XX%U5      $ [        R                  XX%U5      $ )Nrx   )cpuxpu)rk   r   r   r   typer   applyr   )r   r   r   r   r   r   s         r   r   r     s_     $]_E3#88==N*%%aCu==aCu55r   r   c                    Uc   eU R                  5       U R                  S   :X  a  U R                  S:X  a  U R                  R                  S:w  a  U R                  S   UR
                  -  S:w  a<  [        SUR
                   SU R                   35        [        R                  XX4U5      $ [        R                  " XR                  5       X2S9nUb  X4-  nU$ [        R                  XX4U5      $ )NrX   Fhpur   z4Some matrices hidden dimension is not a multiple of z^ and efficient inference kernels are not supported for these (slow). Matrix input size found: )r   )r^   r\   requires_gradr   r   	blocksizer   r   r   r   	gemv_4bitr_   )r   r   r   r   r   s        r   matmul_4bitr     s     """wwyAGGBKAOOu$<RWAW772;...!3F{G\G\F]  ^|  }~  }D  }D  |E  F ##A#[AA++a?CJc==r   )NNrx   Nr   )#dataclassesr   mathr   typingr   r   r   r   r)   typing_extensionsr   bitsandbytes.functionalr   r   r
   FutureWarningr*   tuplerJ   rT   compileris_compilingr   
LongTensorri   rk   r   Functionr   r   r   r   r   r   r6   r   r   <module>r     s   !  %    ( #A A: J!ell^U\\9:!S#X!	!> ++ J4 4U=M=M 4RWR^R^ 4	4$ %A %A %APM55>>** M5`455>>** 45n65(( 65x #'%)#'6||6||6 
%,,	6 M"	6 5<<
 6, #'#'>||>||> > 
%,,		>
 5<<
 >r   