
    ;iF              &       
   S SK Jr  S SKJrJr  S SKJr  S SKrSSKJ	r	  SSK
Jr  \	" S	S
5        SOS\R                  S\R                  S\R                  S\\R                     S\\R                     S\R                  4S jj5       r\	" SS
5        SOS\R                  S\R                  S\R                  S\R                  S\R                  S\\R                     S\\R                     S\\R                  \\R                     4   4S jj5       r\	" SS
5        SOS\R                  S\R                  S\R                  S\R                  S\\R                     S\\R                     S\R                  4S jj5       r\	" SS
5      S\R                  S\R                  4S j5       r\	" SS
5      S\R                  S\R                  S\R                  4S  j5       rSPS\R                  S\R                  S\\R                     4S! jjr\	" S"S
5      SQS\R                  4S# jj5       r\	" S$S
5      S\R                  S%\R                  S&\S\\R                  \R                  4   4S' j5       r\	" S(S
5      S\R                  S)\R                  S%\R                  S&\S\R                  S\R                  4S* j5       r\	" S+S
5      S\R                  S&\S,\S-\R                  S\\R                  \R                  4   4
S. j5       r\	" S/S
5      S\R                  S)\R                  S&\S,\S0\\   S\R                  S\R                  4S1 j5       r\	" S2S
5      S\R                  S\R                  S3\\   S)\R                  S%\R                  S&\S\R                  4S4 j5       rS rS5rSrSrS6rS7r\\\\\\S8.r\R4                  S9\R                  S:\R                  S;\R                  S<\\R                     S=\R                  S>\S?\S@\SA\SB\SC\SD\SE\4SF j5       r\R4                  S9\R                  S:\R                  S;\R                  S<\\R                     S=\\R                     SG\SH\S>\S?\SI\SJ\S@\SA\SB\SC\SD\SE\4"SK j5       r\	" SLS
5        SRSM\S9\R                  S:\R                  S;\R                  S<\\R                     S=\\R                     SG\SH\S>\S?\SI\SJ\S@\SA\SB\SC\SD\SS4$SN jj5       rg)S    )Sequence)prodsqrt)OptionalN   )register_kernel   )CODEzbitsandbytes::int8_mm_dequantdefaultA	row_stats	col_statsdtypebiasreturnc                 ~  ^ ^^ [         R                  " T R                  [         R                  :H  U 4S j5        [         R                  " TR                  [         R                  :H  U4S j5        [         R                  " TR                  [         R                  :H  U4S j5        T R                  ST R                  S   5      nTR                  S5      R                  S5      mTR                  S5      R                  S5      mUTT-  -  S-  nUb  Xd-  nUR                  U=(       d    [         R                  5      $ )Nc                  "   > ST R                    3$ )NzA must be int32, got r   r   s   q/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/bitsandbytes/backends/default/ops.py<lambda>_.<locals>.<lambda>       3H	1R    c                  "   > ST R                    3$ )Nzrow_stats must be float32, got r   )r   s   r   r   r          =\]f]l]l\m;nr   c                  "   > ST R                    3$ )Nzcol_stats must be float32, got r   )r   s   r   r   r      r   r   r   g D1@?)torch_checkr   int32float32viewshapereshape	unsqueezetofloat16)r   r   r   r   r   A_calcouts   ```    r   _r+      s     
LLEKK')RS	LLEMM13no	LLEMM13noVVB$F!!"%//3I!!"%//2I
I	)
*\
9C66%(5==))r   z"bitsandbytes::int8_mixed_scaled_mmCACBSCASCBoutlier_colsc           	      B   S nUb  UR                  5       (       a  U S S 2U4   R                  5       n[        R                  R                  R
                  R                  US S 2U4   R                  5       U5      R                  U R                  5      R                  5       nO*[        R                  " SU R                  U R                  S9n[        R                  R                  R                  R                  XX4X`R                  S9n	Ub  U	R                  X5      n	X4$ )Nr   devicer   )r   r   )numel
contiguousr   opsbitsandbytesint8_vectorwise_dequantr   r'   r   temptyr3   int8_scaled_mmaddmm)
r   r,   r-   r.   r/   r0   r   subBsubAoutputs
             r   r+   r+   "   s     DL$6$6$8$8L!,,. II""::BB2aoCVCaCaCcehiR[QS 	 {{1QXXQWW= YY##22::23RV^e^e:fFd)<r   zbitsandbytes::int8_scaled_mmBc                     [         R                  R                  R                  R	                  X5      n[         R                  R                  R
                  R	                  UUUU=(       d    [         R                  US9$ )N)r   r   )r   r6   r7   int8_linear_matmulr   int8_mm_dequantr(   )r   r@   r   r   r   r   out_i32s          r   r+   r+   I   sd     ii$$77??EG99!!1199$u}} :  r   z bitsandbytes::int8_linear_matmulc                     [        X5      $ N)_int8_linear_matmul_impl)r   r@   s     r   r+   r+   \   s    #A))r   z$bitsandbytes::int8_linear_matmul.outr*   c                 ~    [         R                  " UR                  [         R                  :H  5        [	        XU5        g rF   )r   r    r   r!   rG   )r   r@   r*   s      r   r+   r+   a   s%    	LLekk)*Q3'r   c                     [         R                  " U R                  5       UR                  5       R                  5       5      R	                  [         R
                  5      nUb  UR                  U5      nU$ rF   )r   matmulfloatr9   r'   r!   copy_)r   r@   r*   results       r   rG   rG   g   sK    \\!'')QWWY[[]366u{{CF
6"Mr   z#bitsandbytes::int8_vectorwise_quantc                    [        U R                  S S 5      nS nS nUS:  a  U R                  5       U:  nUR                  5       (       aI  [        R
                  " UR                  SS95      R                  S5      nX   R                  5       nSX'   O.[        R                  " SU R                  [        R                  S9n[        R                  " U R                  5       SS9R                  R                  5       n[        R                  " U SUR                  S5      -  -  5      R!                  [        R"                  5      nUS:  a  Ub	  SUS S 2U4'   Ub  X@W'   XvU4$ )Nr           r   dimr2      g     _@)r   r$   absanyr   argwherer#   cloner:   r3   int64maxvaluesrK   roundr&   r'   int8)r   	thresholdrowsr0   outlier_restoreoutliersr   out_rows           r   r+   r+   o   s,   DLO3557i'<<>> !>>(,,1,*=>CCBGLk//1OAK !;;qML 		!%%'q)00668I kk!uy':':2'>>?@CCEJJOG axL,#$<  "%(|++r   z bitsandbytes::quantize_blockwisecode	blocksizec                 0   [         R                  " U5        U R                  5       nX2-  nUS:  nX2-  U-   n[         R                  " U4U R                  [         R
                  S9nU R                  U5      nUS X4-
   n	U	R                  X2-  U5      n
[         R                  " U
5      R                  SS9S   US Xe-
  & [         R                  " U
SUS Xe-
   R                  SS5      -  -  SS5      nUR                  S5      nU(       ah  [         R                  " XU-
  S  5      R                  5       US'   [         R                  " XU-
  S  SUS   -  -  SS5      n[         R                  " X/SS9n[         R                  " UR                  S5      UR                  UR                  5      -
  5      n[         R                  " USS9R                  [         R                  5      R                  UR                  5      R                  U R                   5      nX4$ )Nr   r2   r   rP   rR   )r   _check_is_sizer4   zerosr3   r"   r%   rS   rX   clampr#   catr&   r'   argminuint8r$   )r   ra   rb   nremhas_remblocksabsmax
A_reshapedA_comA_com_reshapedscaled_Ascaled_A_remdiffr*   s                  r   r+   r+      s   	#		A
-CAgG^g%F[[&1885==IF1Jy!E]]1>9=N!&>!:!>!>2!>!Fq!IFV{{>Q8J&:J1K1P1PQSUV1W-WXZ\^_`H#HYYzc')4599;r
{{:#gi#8Ar
N#KRQRS99h51=99X''+dgghoo.FFGD
,,t
$
'
'
4
7
7
H
P
PQRQXQX
YC;r   z"bitsandbytes::dequantize_blockwisern   c                 d  ^  [         R                  " U5        [         R                  " T R                  [         R                  :H  U 4S j5        UT R                  S5      R                  5          nUR                  S   U-  nUR                  S   U-  nUS:w  a.  [         R                  R                  R                  USX7-
  4SSS9nUR                  SU5      UR                  SS5      -  R                  U5      R                  S5      nUS Xc-  U-    nUR                  T R                  5      nU$ )Nc                  "   > ST R                    3$ )NzA must be uint8, got r   r   s   r   r   r      r   r   r   r   constant)modevaluerR   )r   rd   r    r   ri   r%   intr$   nn
functionalpadr#   r'   )r   rn   ra   rb   r   r*   rm   ress   `       r   r+   r+      s    	#	LLEKK')RS
qyy}  "
#CYYr]i'F
))B-)
#C
axhh!!%%cAy+?jXY%Z88B	"V[[Q%77
;
;E
B
J
J2
NC
("S(
)C
++agg
CJr   zbitsandbytes::quantize_4bit
quant_typequant_storagec                   ^ ^ [         R                  " U5        [         R                  " TS;   U4S j5        [         R                  " T R                  [         R                  [         R
                  [         R                  4;   U 4S j5        T R                  5       nXA-  nXA-  nU(       a  US-   OUn[         R                  " U4T R                  [         R                  S9nT R                  U5      n	U	S XF-
   R                  XA-  U5      n
[         R                  " U
5      R                  SS9S   US U& [         R                  " U
SUS U R                  SS5      -  -  SS5      R                  S5      nU(       ac  X* S  n[         R                  " U5      R                  5       US'   [         R                  " USUS   -  -  SS5      n[         R                  " X/SS9n[         T   R#                  UR                  5      R#                  UR                  5      n[         R$                  " [         R                  " UR                  SS5      U-
  5      SS	S
9R#                  [         R&                  5      nUS S S2   S-  USS S2   -  nU[         R&                  :w  a.  UR)                  5       R                  U5      R+                  S5      nUUR-                  5       4$ )Nnf4fp4c                     > ST  3$ Nz#quant_type must be nf4 or fp4, got  r   s   r   r   r          9\]g\h7ir   c                  "   > ST R                    3$ )NzDBlockwise 4bit quantization only supports 16/32-bit floats, but got r   r   s   r   r   r      s    VWXW^W^V_`r   rR   r2   r   rP   r   T)rQ   keepdimr	      )r   rd   r    r   bfloat16r(   r"   r4   re   r3   r%   rS   rX   rf   r#   rg   r
   r'   rh   ri   squeezer&   rK   )r   rb   r   r   rj   full_blocksrk   rm   rn   A_flattenedA_full_blocksscaledA_rem
scaled_remra   	quantizedpackeds   ` `              r   r+   r+      sS    
#	LL~-/ij	LL	ENNEMM5==AA`
 	
	A.K
-C #[1_F[[&1885==IF))A,K  	!'*221>9MM 99]377B7?BF<K[[!f\k.B.G.GA.N*NOQSUVW__`bcF DE"YYu%))+r
[[!fRj.!92qA
F/Q7 
v}}-00>DUYYv{{2q'9D'@ArSWX[[\a\g\ghI ss^q 9QTT?2F#!&&}5??B6<<>!!r   zbitsandbytes::dequantize_4bitr$   c                 J  ^^ [         R                  " U5        [         R                  " TS;   U4S j5        [         R                  " T[         R                  [         R                  [         R
                  4;   U4S j5        U R                  [         R                  :w  a  U R                  [         R                  5      n U R                  S5      n [         R                  " U R                  S5      S-  [         R                  U R                  S9nUR                  5       nU S-  US	S S2'   U S
-	  US S S2'   [        T   R!                  T5      R!                  U R                  5      nX   nUR                  5       U:w  a2  UR                  5       US	-   :X  d   e[         R"                  " USSU5      nXr-  n	XU-  S:  a  S	OS-  n	Xr-  n
U
S:  n[         R                  " UTU R                  S9R                  S5      nU(       aV  US Xz-
   R                  SU5      US X-
   R                  SS	5      -  R                  S5      US Xz-
  & XgU
-
  S  US   -  XU
-
  S & O%UR                  SU5      UR                  SS	5      -  nUR                  " S/US	S  Q76 R!                  T5      nU$ )Nr   c                     > ST  3$ r   r   r   s   r   r   r      r   r   c                     > ST  3$ )NzFBlockwise 4bit dequantization only supports 16/32-bit floats, but got r   r   s   r   r   r      s    XY^X_`r   r   r   r	   )r   r3      rR   r   )r   rd   r    r   r(   r"   r   ri   r#   r%   r:   sizer!   r3   r4   r
   r'   narrow)r   rn   rb   r   r$   r   out_dqrj   ra   rm   rk   rl   r*   s      ` `       r   r+   r+      sD    
#	LL~-/ij	LL%..%--??` 	ww%++FF5;;			"A[[Qekk!((KFAs7F14a4Lq&F3Q3K
u%((2D\F ||~||~Q&&&faA.^F
y=1$a!+F
-CAgG
++e5
:
B
B2
FC 17+00Y?&I[6K[B\BaBabdfgBhhqqrtuIagC	*VBZ7GIkk"i(6;;r1+==
++b
%59
%
(
(
/CJr   zbitsandbytes::gemv_4bitshapeBc                     US   S:  a  SOSn[         R                  R                  R                  R	                  XXVX R
                  5      n[         R                  R                  R                  U US S9$ )NrR   r   r   r   )r   )	r   r6   r7   dequantize_4bitr   r   r{   r|   linear)r   r@   r   rn   ra   rb   r   B_dqs           r   r+   r+     sk     q'A+5J99!!1199!Y\bdkdklD88%%	 &  r   rR   r      )momentumrmspropadagradadamlionademamixgpstate1state2	unorm_vecbeta1beta2epsweight_decaysteplrgnorm_scaleoptimizer_idc                 b   X-  nUS:X  a]  SSXY-  -
  -  nSSXi-  -
  -  nX%-  SU-
  U-  -   nX6-  SU-
  U-  U-  -   nUU-  nUU-  nU[         R                  " U5      U-   -  nUU-  nOUS:X  a  UnOUS:X  a  U	S:X  a  UnOX%-  U-   nUU-  nO|US:X  a  X&-  SU-
  U-  -   nUnOfUS:X  a2  X%-  SU-
  U-  U-  -   nU[         R                  " U5      U-   -  nUU-  nO.US:X  a(  X-U-  -   nU[         R                  " U5      U-   -  nUU-  n[         R                  " W5      nUR                  U5        g)	z.Preprocessing optimizer, computing update normr         ?r   r   rR   r   r	   N)r   r   sumadd_)r   r   r   r   r   r   r   r   r   r   r   r   r   g_valscorrection1correction2s1_valss2_valsupdate_valsupdate_norm
total_norms                        r   _optimizer_precondition_32bitr   D  s   $ _FqS5;./S5;./.C%K6#99.C%K6#9F#BBK'K'G!4s!:;!K/				19Gnv-G'		.C%K6#99		.C%K6#9F#BB

7 3c 9:!K/		F?*

7 3c 9:!K/;'JNN:r   	max_unorm
param_normbeta3alphac                 B   UR                  5       nX-  R                  5       nUS;   a  US:  a  UUU-  -   nSnUS:  aA  [        R                  " U5      nUS;   a  UXV-  U-   :  a
  XV-  U-   U-  nOUXV-  :  a  XV-  U-  nUS:X  a  X'-  SU-
  U-  -   nX8-  SU-
  U-  U-  -   nSX}-  -
  n[        SX-  -
  5      nU* U-  U-  nUS:  a
  USX-  -
  -  nUU-  U[        R                  " U5      UU-  -   -  -  nUU-   nUR                  U5        UR                  U5        GOUS:X  a  US   nUS   nUnUU-  SU-
  U-  -   nUU	-  SU	-
  U-  -   nUU-  SU-
  U-  U-  -   nSX}-  -
  n[        SX-  -
  5      nUS:  a
  USX-  -
  -  nUU-  U
U-  -   n[        R                  " U5      U-  U-   n UUUU -  -  -
  nUS   R                  U5        US   R                  U5        UR                  U5        GOUS:X  a0  US:X  a  UnOX'-  U-   nUU* U-  -  nUU-   nUR                  U5        OUS:X  aM  X'-  SU-
  U-  -   n!UU-  [        R                  " U!5      -  nUU-
  nX(-  SU-
  U-  -   nUR                  U5        OUS:X  aI  X'-  SU-
  U-  U-  -   nUU-  U-  [        R                  " U5      U-   -  nUU-
  nUR                  U5        OCUS	:X  a=  UUU-  -   nUU-  [        R                  " U5      U-   -  nUU-
  nUR                  U5        UR                  U5        g
)zUnified optimizer update kernel)r   rR   r	   r   rO   r   r   r   r   rR   r   r	   N)rK   r   r   rL   sign)"r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   p_valsr   update_scalecurrent_unormr   r   r   r   	step_size
update_vals3_valsm1m2numixed_momentumadaptive_termmomentum_updates"                                     r   _optimizer_update_32bitr     s   , WWYFo$$&F|#s(:&<//L3

9-<'y5;; ) 6 <My55 ) 6-Gq.C%K6#99.C%K6#9F#BBEK'3,-C+%3	#sR%667F!I-EJJw<ORUXcRc<c1de
*$WW		))u_ev55u_ev55u_ev5>>EK'3,-#sR%667F{*urz:B+5<" >??q	q	R		19Gnv-G!bS7]3
*$W		 .C%K6+AA!B&O)DD
*$.C%K6#99W		.C%K6#9F#BB!B&/5::g3F3LM
*$W		6F?*&[EJJw$7#$=>
*$WGGFOr   z$bitsandbytes::optimizer_update_32bitoptimizer_namec                 V   U(       a  [        S5      e[        U    nU S:X  aE  [        UUUUUUUUU	U
UUUUUUU5        US:  a#  UR                  5         [	        XX4XXXXUUU5        ggUS:  a"  UR                  5         [	        XX4XXXXUUU5        [        UUUUUUUUU	U
UUUUUUU5        g)z=
32-bit optimizer implemented by PyTorch with @torch.compile
zskip_zeros is not supported yetr   rO   N)NotImplementedErrorname2optimizer_idr   zero_r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   
skip_zerosr   s                      r   r+   r+     s    0 !"CDD$^4L#	
( s?OO)fiLXZ\giu  s?OO)fiLXZ\giu 	 #	
r   )NNrF   )rO   )r   F)collections.abcr   mathr   r   typingr   r   _opsr   utilsr
   Tensorr   r+   tuplerG   rz   strMOMENTUMRMSPROPADAGRADADAMLIONADEMAMIXr   compilerK   r   r   r   r   r   <module>r      sw   $    #  0)<
 $(#'*||*||* ||* EKK 	*
 5<<
 * \\* =*, 5yA ,0#'#||## 	# 
	#
 
# 5<<(# 5<<
 # 5<<%,,//0# B#L /; $(#'|||| || ||	
 5<<
  EKK  \\ <$ 3Y?* *%,, * @* 7C( (%,, (U\\ ( D(
  HU\\DZ  6	B", ", C",J 3Y? U\\ c eELLRWR^R^D^>_  @2 5yA u|| 5<< C X]XcXc hmhtht  B  .	:'"||'" #'"14'"EJ[['"
5<<%&'" ;'"T 0)</||/LL/ / 	/
 C=/ ;;/ \\/ =/d *I6|||| SM LL	
 ,,  \\ 7& 

   9||9||9 LL9 U\\"	9
 ||9 9 9 
9 9 9 	9 9 9 9x m||m||m LLm U\\"	m
 %m m m m m m m 
m m m 	m  !m" #m m` 7C$ %O
O
||O
 ||O
 LL	O

 U\\"O
 %O
 O
 O
 O
 O
 O
 O
 
O
 O
 O
  	!O
" #O
& 
'O
 DO
r   