
    iH                        S SK JrJrJr  \" 5       (       a
  SSKrSSKJr  \" 5       (       a  SSKJr  SSKrSSK	J
r
  \R                  " \5      r/ SQr\
S 5       rS	 rS
 r\R$                  SS.S\R&                  S\S\R*                  4S jjr " S S\R.                  5      rS rS rS rS rS r     SS jr    SS jrg)   )is_accelerate_availableis_torch_availablelogging    N)nn)init_empty_weights)contextmanager)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c              #     #    [        5       (       a  SS Kn[        XR                  5      (       a  U R                  n O'[        U [
        5      (       a  UR                  " U 5      n [        U SS 5      nUS:X  a*  UR                  R	                  U 5         S v    S S S 5        g US:X  a;  [        US5      (       a*  UR                  R	                  U 5         S v    S S S 5        g S v   g ! , (       d  f       NT= f! , (       d  f       N$= f7f)Nr   typecudaxpu)
r   torch
isinstanceTensordevicestrgetattrr   hasattrr   )devr   dev_types      o/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/transformers/integrations/mxfp4.py	on_devicer   3   s     c<<((**CS!!,,s#C3-v""3' (' u!6!6!!#& '& 
 (' '&s6   BD	C';D	C8D	'
C51D	8
DD	c                     UR                   R                  R                  nU" U R                  [        R
                  5      [        R                  SS9u  pX4$ )N   )axis)numerics_detailsmxfpdowncast_to_mxfp_torchtor   bfloat16uint8)wtriton_kernels_hubr   w_scales       r   quantize_to_mxfp4r%   J   sD    /@@EE\\'U^^(<ekkPQRJA:    c                 R   UR                   R                  UR                   R                  UR                   R                  pTnUR                  R
                  nUR                  R
                  R                  nUR                  SS9u  pU" U" XS9U40 U	D6n U" U" U5      U5      nX4$ )z=
Changes the layout of the tensors depending on the hardware
r   )mx_axisdtype)tensorFP4convert_layoutwrap_torch_tensortensor_detailslayoutStridedLayout"make_default_matmul_mxfp4_w_layout)
r"   r$   r#   r,   r-   r.   r0   r1   value_layoutvalue_layout_optss
             r   swizzle_mxfp4r5   P   s    
 	!!%%!!00!!33 +C
  ..55F&55<<JJM&,&O&OXY&O&Z#L(6ZHYZA.w7GG:r&   i   )r*   rows_per_chunkr*   r6   returnc                   SSK nU R                  (       dC  [        R                  R	                  5       (       a   U R                  5       n UR                  5       nUR                  [        R                  5      S-
  nU R                  SS UR                  :X  d&   SU R                  SS < SUR                  < 35       e[        R                  " [        X R                  S9nU R                  Gt pgnUR                  U5      U-  n	U R                  X5      n UR                  U	S5      n[        R                  " XS	-  X R                  S9n
[        SX5       H  n[        X-   U	5      nXU nXU nUS
-  R                  [        R                   5      nUS-	  R                  [        R                   5      nXU nX_   USS2SSS	24'   UU   USS2SSS	24'   [        R"                  " UUUS9  AAAAAM     U
R                  " / UQUPUS	-  P76 R$                  " / UQXx-  S	-  P76 n
A AAU
R'                  SS	5      R)                  5       $ )zk
Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward
pass of GPT_OSS.
r   N   zblocks.shape[:-1]=z does not match scales.shape=)r*   r   r   r         )out)mathis_cudar   r   is_availabler   int32shaper+   
FP4_VALUESr   prodreshapeemptyrangeminlongldexpview	transpose
contiguous)blocksscalesr*   r6   r>   lutprefix_shapeGB
rows_totalr=   r0r1blkexpidx_loidx_hisubs                     r   convert_moe_packed_tensorsr\   d   s
     >>ejj5577YYu{{#c)F<<,d1Ccr1B0DDbU[UaUaTc.dd,
,,z}}
EC ,,\a<(1,J^^J*F^^J*F
++ja%u]]
KCAz2$j1mm *,(uzz*Rj{Aqt!tG6{Aqt!tGC#&FCc 3" ++
.|
.Q
.A
.
3
3
M\
M1519
MC==A))++r&   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )Mxfp4GptOssExperts   c           
      B  > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        [        R                  " [        R                  " U R                  SU R                  -  U R
                  S-  S[        R                  S9SS9U l        [        R                  " [        R                  " U R                  SU R                  -  U R
                  S-  [        R                  S9SS9U l        [        R                  " [        R                  " U R                  SU R                  -  [        R                  S9SS9U l        [        R                  " [        R                  " U R                  U R
                  U R                  S-  S4[        R                  S9SS9U l        [        R                  " [        R                  " U R                  U R
                  U R                  S-  [        R                  S9SS9U l        [        R                  " [        R                  " U R                  U R
                  [        R                  S9SS9U l        SU l        ['        USS	5      U l        S U l        S U l        ['        USS	5      U l        g )
Nr          r)   Frequires_gradgZd;?swiglu_limitg      @)super__init__num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosr!   gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalphar   limitgate_up_proj_precision_configdown_proj_precision_config)selfconfig	__class__s     r   rg   Mxfp4GptOssExperts.__init__   s   !33!'!9!9!--#%<<KK((!d.D.D*DdFVFVZ\F\^`hmhshst$
  $&<<KK((!d.D.D*DdFVFVZ\F\didodop$
  "$KK((!d.D.D*DEMMZjo"
 !#KK))4+;+;T=S=SWY=Y[]^fkfqfqr!
 !#KK(($*:*:D<R<RVX<X`e`k`kl!
 !llKK(($*:*:%--P`e
 
V^S9
-1**.'V^S9
r&   hidden_statesr7   c                    [         R                  R                  [         R                  R                  [         R                  R                  pvn[         R                  R
                  n[        UR                  5         U" U" SUS5      U R                  U R                  4S5      n	U" UU R                  U R                  R                  [        R                  5      UUU R                  S U	S9n
U" U
U R                   U R"                  R                  [        R                  5      UUU R$                  UR&                  S9nS S S 5        U$ ! , (       d  f       W$ = f)Nswiglu)ru   rv   r   )gather_indxprecision_configgammasfused_activation)scatter_indxr   r   )r#   
matmul_ogsFnSpecsFusedActivationr   	swiglu_fnr   r   ru   rv   gate_up_projrq   r   r   rp   rw   	down_projrt   rx   	gate_scal)ry   r}   routing_data
gather_idxscatter_idxr   r   r   r   actintermediate_cache1intermediate_cache3s               r   forwardMxfp4GptOssExperts.forward   s,   ))11))99))44 #-
 '--77	}++,!'(I?Q"RUYU_U_aeakakTlnopC",!!&&))%--8&!%!C!C!$	# #-###&&u}}5(!%!@!@#--# -. #"/ -,. #"s   =CE		
E)ru   rt   rr   rx   rs   rq   rn   rw   ro   rk   rj   rv   ri   )
__name__
__module____qualname____firstlineno__rg   r   r   r   __static_attributes____classcell__)r{   s   @r   r^   r^      s,    ":H#U\\ #]b]i]i # #r&   r^   c                 ,   SS K n[        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R                  4u  p4pV[        U R                  5         [        R                  R                  5       n[        UR                  R                  SS5      5      nSn	U R                  S   n
U R                  S   nX-  nX-  nUS-   U-  nX-  nS nU" X5      u  nn[        R                   " USS9n[        R"                  " USS9u  nn[        R$                  " USU5      nUR'                  S5      n[        R(                  " UXS-
  S9X nUR+                  S5      R-                  [        R.                  5      nS	n[        R0                  " UU:  UU5      n[        R2                  " US
S9R-                  [        R.                  5      n[        R2                  " U5      R-                  [        R.                  5      n[        R0                  " UU:  UU	5      n[        R0                  " UU:*  UU	5      n[        R0                  " UU	:H  U	U5      nUU   n[        R0                  " UU   U	:H  U	U5      nU" UR                  5       UR                  5       S9nU" UR                  5       UR                  5       S9nU" UX5      nUnS S S 5        U" WWWWW5      WW4$ ! , (       d  f       N= f)Nr   
LOCAL_RANK0r:   r   c                     [         R                  " U * SSS9S S 2S U24   nUR                  5       n[         R                  " XSS9nX2R	                  5       4$ )Nr   T)dimstabler   )r   argsortrI   take_along_dimint)valsktk_indxtk_vals       r   topk routing_torch_dist.<locals>.topk   sO    mmTEq>q"1"uEGllnG))$Q?F;;=((r&   r   )binsmaxi  T)r   )src_indxdst_indx)osr#   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r   r   distributedget_world_sizer   environgetrB   softmaxsortgatherrE   histcrK   r   rA   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr   r   r   	expt_datahit_expertss                                r   routing_torch_distr      s     	""--""..""..""::	EAJ[ 
6==	!&&557
2::>>,45<<?ll1o%3!3 1H7,	)  $F8	9MM)4	"'**YA">	<LLA|<	 %%b)	{{9;!OLM_qNN2&))%++6	 KK	,> >YO	MM)D9<<U[[I	MM),//<	KK	,< <iW	KK 2i ?MZ	KK	] :M9U	i(	KK	) 4 E}V_`	 !)--/IMMOT"IMMOimmoV+D/O	!g 
"h y$iPR]_kkki 
"	!s   I+L
Lc                    SS K Jn  UR                  5       (       a-  UR                  5       (       a  [	        U S5      (       a  [
        nO[        R                  R                  nUR                  S   nUR                  SU R                  R                  5      n[        R                  R                  XR                  R                  U R                  R                   5      n[#        UR$                  5         U" XPR                  R&                  5      u  pgnS S S 5        U R)                  UWWW5      n	U	R                  USU R                  R                  5      n	X4$ ! , (       d  f       NL= f)Nr   
_is_hookedr:   )torch.distributedr   r@   is_initializedr   r   r#   r   rB   rE   router
hidden_dimr   
functionallinearweightbiasr   r   top_kexperts)
ry   r}   distr   
batch_sizerouter_logitsr   r   r   
routed_outs
             r   mlp_forwardr   '  s   $t2244|9T9T$$,,44$$Q'J!))"dkk.D.DEMMM((8J8JDKKL\L\]M	=''	(07{{GXGX0Y-+ 
) m\:{SJ##JDKK4J4JKJ$$ 
)	(s   5 E
E)c                 ^   ^ SR                  U 5      m[        U4S jU 5       5      (       d  gg)N.c              3      >#    U  H>  n[         R                  " U S 3T5      =(       d    [         R                  " U T5      v   M@     g7f)z\.N)rematch).0keycurrent_key_name_strs     r   	<genexpr>(should_convert_module.<locals>.<genexpr>=  s>      ksdgC523_rxx3%J^7__kss   AA	TF)joinany)current_key_namepatternsr   s     @r   should_convert_moduler   ;  s5    88$45 ks   r&   c                 $   SSK Jn  UR                  S5      nUR                  S5      nUR                  S5      n	UR                  S5      n
UR                  S5      nUR                  S5      nS	 GH  nX;   d  M  Ub  U" UUUUU	U
UU5      nU S
3nU S3n[        XR	                  SS5      S   U5        [        X5      (       d  MX  [        X5      (       d  Mj  [        [        X5      [        X5      5      nUS:X  aA  [        R                  R                  5       (       a  [        R                  R                  5         [        X[        R                  R                  UR                  U5      5      5        [        X5        [        X5        GM      g )Nr   shard_and_distribute_modulemodelempty_paramcasting_dtypeto_contiguousr   device_mesh)r   r   _blocks_scalesr   r   cpu)integrations.tensor_parallelr   r   setattrrsplitr   r\   r   r   r   r@   empty_cacher   rl   r   delattr)module
param_nameparam_valuetarget_devicedq_param_namekwargsr   r   r   r   r   r   r   projblocks_attrscales_attrdequantizeds                    r   
dequantizer  D  sP   JJJwE**]+KJJ/MJJ/M::fD**]+K-&9!!!	 "F'*K!F'*KF--c15a8+Fv++0L0L89UW^_eWst E)ejj.E.E.G.GJJ**,ehh&8&89V&WX,,- .r&   c                    UR                   R                  UR                   R                  UR                   R                  pnSSKJn	  UR                  S5      n
UR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      nS	U;   a&  UR                  S
5      S   R                  S5      S   nSU;   a&  UR                  S
5      S   R                  S5      S   nUb  U	" XXXX5        O;[        XR                  S
S5      S   [        R                  R                  USS95        W S3nU S3n[        U U5      n[        U U5      nUR                  R                  S:w  Ga  UR                  R                  S:w  Ga  UR!                  S5      nUS:X  a!  UR#                  UU R$                  S-  S5      nO UR#                  USU R$                  S-  5      n[        USU5      S:X  a  SnUR'                  U5      R)                  5       nUR'                  U5      R)                  5       n[+        U5         [-        UR/                  SS5      UR/                  SS5      U5      u  nnSSS5        US:X  a6  [        R0                  " UU R2                  U R$                  S-  /5      Wl        O2[        R0                  " UU R$                  U R2                  /5      Wl        [        U UU5        [        U U S3U" WU" U" 5       S9S95        [7        U U5        [7        U U5        Aggg! , (       d  f       N= f)zi
This transforms the weights obtained using `convert_gpt_oss.py` to load them into `Mxfp4GptOssExperts`.
r   r   r   r   r   r   r   r   rN   r   r:   r   r   rO   r   Nr   Frc   metar   r   r   r   _precision_config)rhs_data)weight_scaleflex_ctx)r   PrecisionConfigFlexCtx
InFlexDatar   r   r   splitr   r   r   r   rl   r   r   r   sizerE   rj   r   rM   r   r5   rL   Sizerk   rB   r  )r  r  r  r  r#   r  r  r  r  r   r   r   r   r   r   r   r  r	  r
  rN   rO   local_expertstriton_weight_tensorr  s                           r   load_and_swizzle_mxfp4r  g  s   
 	%%55%%--%%00 )O
 KJJwE**]+KJJ/MJJ/M::fD**]+K:$R(..y9!<:$R(..y9!<#W[	
 	))#q1!4ehh6H6Hdi6H6jkF'"KF'"KV[)FV[)F}}V#(:(:f(DA>!^^M63K3Ka3OQSTF^^M2v7O7OST7TUF=&-8EA"M=)446=)446}%1>  R(&*:*:2r*BDV2. , & >!).]FDVDVX^XpXpstXt4u)v &).]FD\D\^d^p^p4q)r & 	23f%&Q[Q]@^_	
 	$$A )E# &%s   1M


Mc           
         Uc  / nU R                  5        GH  u  pgUR                  U5        [        X!5      (       d  UR                  S5        M:  UR                  R
                  S:X  a>  UR                  (       d-  [        5          [        U5      U R                  U'   SnS S S 5        UR                  R
                  S:X  a)  UR                  (       d  SSK
Jn  U" [        U5      Ul        [        [        UR!                  5       5      5      S:  a  [#        UUUUUUS9u  pUR                  S5        GM     X4$ ! , (       d  f       N= f)Nr:   GptOssExpertsT	GptOssMLPr   )
MethodType)has_been_replacedrz   )named_childrenappendr   popr{   r   r  r   r^   _modulestypesr   r   r   lenlistchildren_replace_with_mxfp4_linear)
r   modules_to_not_convertr   quantization_configr!  rz   namer  r   _s
             r   r*  r*    s    ,,.%$%5NN  $$$7@S@^@^#%'9&'At$$(! & $$3<O<Z<Z('V<FNtFOO%&'!+#=& #"3$ A 	R - /. ### &%s   D::
E	c                 &   UR                   (       a  U $ SSKJn  U" S5      qUc  S/OUnUR                  b  UR                  UR                  5        [        [        U5      5      n[        U UUUUS9u  pU(       d  [        R                  S5        U $ )Nr   )
get_kernelz kernels-community/triton_kernelslm_head)rz   zYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r  kernelsr0  r#   r+  extendr(  setr*  loggerwarning)r   r+  r   r,  rz   r0  r!  s          r   replace_with_mxfp4_linearr7    s     %%& ((JK,B,Ji[Pf11=%%&9&P&PQ!#&<"=>9 E 	
 Lr&   )NNNFN)NNNN) utilsr   r   r   r   r   
accelerater   r   
contextlibr	   
get_loggerr   r5  rC   r   r%   r5   r    r*   r   r   r\   Moduler^   r   r   r   r  r  r*  r7   r&   r   <module>r>     s    I H - 	 % 
		H	%
( 
 
,0 &3, ;;	3,
 3, \\3,lD# D#RAlH%( -F@J  "$N  "r&   