
    7h                        S SK r S SKJr  S SKrS SKJs  Jr  S SKJ	r	  SSK
JrJr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJrJrJrJrJrJrJr  SSKJrJrJr  SSKJ r J!r!  SSK"J#r#J$r$J%r%  S\RL                  S\R&                  S\R&                  S\R&                  S\R&                  S\'\(\R&                  \\R&                     4   4S jr)S\(S\$S\$S\\$   S\\$   S\\$   S\\$   S\$4S jr*    S#S\S\+\   S\+\   4S  jjr,S!\,l-        S" r.g)$    N)Optional)mm_args   )configir)CppGemmTemplate)CppGroupedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_gemm_template)opsOpsValueVW_tensorpacked_weightx_scalex_zpw_scalereturnc                 F   SnS nS n[        S X#U4 5       5      (       Ga  Sn[        R                  R                  UR	                  5          [        R                  R                  UR	                  5          -  n[        R                  R                  UUR	                  5       S-   S9n[        R                  " U R                  [        R                  5      SS9n	[        R                  R                  UR	                  5          n
X-  U
-  n	[        R                  R                  U	UR	                  5       S-   S9nOa[        R                  " U R                  [        R                  5      SS9n	[        R                  R                  U	UR	                  5       S-   S9nUUU4$ )	NFc              3   ^  #    U  H  n[        U[        R                  5      =(       a}    UR                  5       [        R
                  R                  ;   =(       aK    [        UR                  S 5      =(       a.    [        UR                  R                  [        R                  5      v   M     g7f)dataN)

isinstancer   r   get_namer   graph	constantshasattrr$   ConstantBuffer).0items     Z/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/mkldnn_lowerings.py	<genexpr>+create_int8_compensation.<locals>.<genexpr>*   sz      
 -D	 	4& 	:MMOqww000	:DIIv&	: tyy~~r'8'89	: -s   B+B-T_x_w_compensnamer   )dim_BMatrixCompens)
allr   r'   r(   r&   add_tensor_constanttorchsumtofloat)r   r   r   r   r    use_int8_fast_compensation_pathweight_compens	x_w_scalex_w_scale_tensorweight_compens_tensorx_zp_tensors              r-   create_int8_compensationrA       s    ',#NI
 
 G,   +/'GGg..01gg 0 0 234 	 GG//'')N: 0 
	 !&		(++ekk*B Jgg''8 5 H; V44!''),== 5 

 !&		(++ekk*B J44!''),== 5 

 	(     r;   input_weight_compo_x_scale_x_zp_w_scale
_x_w_scalec                 t   U (       a.  [         R                  " [         R                  " UU5      U5      nU$ [         R                  " [         R                  " UU5      U5      n[         R                  " U[         R                  " [         R                  " [         R                  " UU5      U5      U5      5      nU$ N)r   submul)r;   rC   rD   rE   rF   rG   rH   temps           r-   'codegen_int8_gemm_template_compensationrN   N   s     'wwGG 
H K9 wwGG 
 wwGGGG    	
 KrB   xwbc           
         U R                  5       n[        U5      S:  a  [        U SUS   /5      n [        U5      n[        R                  (       d  [        R
                  (       d   eU V	s/ s H&  oc  U	O[        R                  R                  U	5      PM(     nn	/ n
[        U [        US   SS/5      US9Gt pp[        U V	s/ s H  oS LPM     sn	SS [        R                  [        U5      U 5      S9nU /UQnUR                  U V	s/ s H	  oc  M  U	PM     sn	5        [        R                   " U
UU40 UD6  [        U
5      S:w  d   e[#        SU
UU5      nUR$                  R$                  n[        U5       Vs/ s H!  n[        R&                  " Xo[(        U4/5      PM#     nn[        R*                  " US   R-                  5       S	9Ul        UUl        [        U5       Vs/ s H%  n[        R2                  R5                  UU   5      PM'     nn[        U5      S:  a@  [        U5       H1  n[        UU   / US S QUU   R                  5       S   P75      UU'   M3     U$ s  sn	f s  sn	f s  sn	f s  snf s  snf )
N   r   r   layoutT)has_biastrans_wepilogue_creatoract_mappinggrouped_gemm)device)get_sizelenr   r   max_autotunemax_autotune_gemmr   ExternKernelrealize_inputr   r   dictfromkeysrangeextendr	   add_choicesr   r$   MultiOutputlistMultiOutputLayout
get_devicerV   outputsr   create)rO   rP   rQ   attrscalars	algorithmrV   x_sizenum_gemmbiaschoices_kwargsinput_nodesresulttemplate_bufgemm_idxreturn_bufsreturn_tensorss                      r-   grouped_gemm_loweringr}      sr    ZZ\F
6{QR$%1vH&":":::STUST42??#@#@#F	FSTAU"$Gq'!A$A"7GQ/01qtd"q1MM%/15	F 'q'K??@&& 	 w<1&	F ;;##L h'H 	vtX.>-?@'   ..k!n6O6O6QRL&LCH?CRxK12?   6{QhH'+x(G&"+G~h7@@B2FG(N8$ (
 [ 	V 2 @"s$   ,-I"I'	I,I,;(I1#,I6Tc            !        ^^^^^^ [         R                  R                  (       GaZ  SSKJm  [        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  SSTR                  R                  S9m[        [         R                  R                  R                  R                  SSTR                   R                  S9m[         R                  R                  R"                  [         R                  R                  R$                  [         R                  R                  R&                  [         R                  R                  R                  [(        R*                  R,                  [         R                  R                  R.                  /n [1        [         R                  R                  R"                  5      S[2        S[2        S	[2        4U4S
 jj5       n[1        [         R                  R                  R"                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R$                  R                  5      S[2        S[2        S[2        S	[2        4U4S jj5       n[1        [         R                  R                  R                  5       S3S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R                  R                  5       S3S[2        S[2        S[2        S[2        4U4S jjj5       n[1        [         R                  R                  R&                  5      S[2        S[2        S	[2        4U4S jj5       n[1        [(        R*                  R,                  5      S[2        S[2        S[2        S[2        S[2        S[2        S[2        S[4        S[6        [8           S[8        S[8        S[8        S[4        S [4        S![4        S"[4        4 U4S# jj5       n[1        [         R                  R                  R.                  S S$9S[2        S%[2        S&[2        S'[2        S	[2        4
U4S( jj5       n[1        [         R                  R                  R:                  R                  S S$9[1        [         R                  R                  R:                  R<                  S S$9S[2        S%[2        S&[2        S'[2        S)[2        S	[2        4U4S* jj5       5       n	[1        [         R                  R                  R                  S S$9 S3S[2        S%[2        S&[2        S'[2        S	[2        4
U4S+ jjj5       n
[1        [         R                  R                  R                  R                  S S$9[1        [         R                  R                  R                  R<                  S S$9 S3S[2        S%[2        S&[2        S'[2        S,[2        S	[2        4U4S- jjj5       5       n[         R                  R>                  (       a  [        [         R                  R@                  RB                  S.STRD                  R                  S9mU RG                  [         R                  R@                  RB                  5        [1        [         R                  R@                  RB                  5      S S/.S[2        S0[2        S1[2        S[H        [2           4U4S2 jjj5       n[K        U 5        g g )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwiserO   weightrs   c
                 t   > [         R                  " T
R                  R                  U UUUUUUUUU	5
      5      $ rJ   )r   rm   ConvolutionUnary)rO   r   rs   paddingstridedilationgroupsrn   ro   rp   r   s             r-   convolution_unary5register_onednn_fusion_ops.<locals>.convolution_unary   sJ     ##**11 rB   otherc                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rJ   )r   rm   ConvolutionBinaryrO   r   r   rs   r   r   r   r   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr   s                r-   convolution_binary6register_onednn_fusion_ops.<locals>.convolution_binary  sS      ##++22 !# rB   c                 z   > [         R                  " TR                  R                  U UUUUUUUUU	U
UU5      5      $ rJ   )r   rm   ConvolutionBinaryInplacer   s                r-   convolution_binary_inplace>register_onednn_fusion_ops.<locals>.convolution_binary_inplace%  sS      ##2299 !# rB   rP   rQ   c                   >^^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n Ub  [        R                  R                  U5      n/ n[        R                  (       d  [        R                  (       at  [        USS/5      n	[        X	US9Gt pp	[        X`U	5      (       aH  UUU4S jn[        US LSTS:X  a  S OUS	9nUb  / S
QUS'   [        R                  " UUUc  X/OXU/40 UD6  [        U5      S:X  d  [        5       (       a>  [        TTTS9nUc  S US'   UR!                  TR"                  " Uc  X/OXU/U40 UD65        UR%                  5       [&        R(                  R*                  ;   d   eSS 0n[-        SUUc  X/OXU/UUS9n[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrS   rT   r   r   rU   c                    > [        U TTTS9$ )Nro   rp   r
   )bufrp   rn   ro   s    r-   rY   Jregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creator]  s    8w)  rB   TnonerW   rX   rY   )rS   r   r   input_indices)rn   ro   rp   Bc                 X    [         R                  R                  U R                  5          $ rJ   r   r'   r(   r&   rO   s    r-   <lambda>Bregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>|      QWW..qzz|<rB   linear_unaryinput_gen_fnsr]   r^   r   r   ra   rb   r   r_   r`   r   r   r   rc   r   rg   r   appendbindr&   r   r'   r(   r   )rO   rP   rQ   rn   ro   rp   rV   rq   rt   transposed_wru   rY   rv   r   rx   aten_mkldnn_linear_unarys      ```         r-   r   0register_onednn_fusion_ops.<locals>.linear_unaryG  s    ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&1.5af.U+A(LAA
 "!"$ $15EUF
 }2;/#//"#)! !	 7|q $9$;$;4IN9"&F3K,11"#)! ! ::<177#4#4444<M /)!+F 6{Qf&Ks&KV__5Fr5J&KLMrB   yc                   >^^ U R                  5       n[        U5      S:  a  [        U SUS   /5      n TR                  5       n[        U5      S:  a  [        TSUS   /5      mUb  [        R                  R                  U5      n/ n[        R                  (       d  [        R                  (       at  [        USS/5      n	[        X	TUS9Gt pp	m[        XPU	5      (       aF  UU4S jn[        US LSUS9nUc  / S	QO/ S
QUS'   [        R                  " UUUc  U TU/OU TX#/40 UD6  [        U5      S:X  d  [        5       (       a?  [        TS9nUc  S US'   UR!                  TR"                  " Uc  U TU/OU TX#/U40 UD65        UR%                  5       [&        R(                  R*                  ;   d   eSS 0n[-        SUUc  U TU/OU TX#/UUS9n[        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrS   rT   r   r   rU   c                    > [        U TTS9$ )N)r   r
   )r   rn   r   s    r-   rY   Kregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator  s    8d!LLrB   Tr   )r   rS   r   )   r   rS   r   r   )rn   r   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   Cregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>  r   rB   linear_binaryr   r   )rO   r   rP   rQ   rn   rV   rq   y_sizert   r   ru   rY   rv   r   rx   aten_mkldnn_linear_binarys    `  `          r-   r   1register_onednn_fusion_ops.<locals>.linear_binary  s$    ZZ\F6{QR,-ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&118Qv2.AQ )LAAM "!"$ $)9F
 <=9i,F?+#//%&YAq	Q1L !	 7|q $9$;$;49"&F3K-22%&YAq	Q1L ! ::<177#4#4444<M /YAq	Q1L+F 6{Qf&Ks&KV__5Fr5J&KLMrB   c                 v   > [         R                  " TR                  R                  U UUUUUUUUU	U
5      5      $ rJ   )r   rm   ConvolutionTransposeUnary)rO   r   rs   r   output_paddingr   r   r   rn   ro   rp   r   s              r-   convolution_transpose_unary?register_onednn_fusion_ops.<locals>.convolution_transpose_unary  sM     ##33::" rB   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    > [         R                  " [        R                  TR                  R                  U UUUUUUUUU	U
UUUUU5      5      $ rJ   )pytreetree_mapr   rm   MkldnnRnnLayer)rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                   r-   mkldnn_rnn_layer4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer  sc    & ??  ((//!! rB   )type_promotion_kindr   r    w_zpc                   > [        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9n[        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9n[        R                  " TR                  R                  U UUUUUUUUU	U
UUUUUU5      5      $ )Ndtyper   r1   r   )typer:   r   r'   r6   r7   tensorfloat32intint32r   rm   QConvPointWisePT2E)rO   r   r   r   r    r   rs   r   r   r   r   o_inv_scaleo_zero_pointoutput_dtypern   ro   rp   r   s                    r-   qconvolution_unary6register_onednn_fusion_ops.<locals>.qconvolution_unary  s    * =E)))gg11WEMM: 2 G :$$$77..T5F / D ##,,33!  # rB   accumc                   > [        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9n[        U5      [        :X  d   e[        R                  R	                  [
        R                  " U[
        R                  S9SS9nUS:X  au  U[
        R                  [
        R                  4;   aQ  UR                  5       [
        R                  [
        R                  4;   a  UR                  5       U:w  a  [        Xn5      n[        R                  " TR                  R                  U UUUUUUUUU	U
UUUUUUUUUUU5      5      $ )Nr   r   r1   r   r8   )r   r:   r   r'   r6   r7   r   r   r   r   bfloat16	get_dtyper   r   rm   QConvPointWiseBinaryPT2E)rO   r   r   r   r    r   r   rs   r   r   r   r   r   r   r   accum_scaleaccum_zpr   alphar   r   unary_algorithmmr   s                         r-   qconvolution_binary7register_onednn_fusion_ops.<locals>.qconvolution_binaryB  sD   > =E)))gg11WEMM: 2 G :$$$77..T5F / D
 u$ U]]ENN$CCOO%%--)HHOO%5 !5##2299!  !$- rB   c                   >^^^^^^^	^
^^^^^^ UR                  5       [        R                  L d   S5       eU R                  5       n[	        U5      S:  a  [        U SUS   /5      n [        T[        R                  5      (       dU  [        T5      [        :X  d   e[        R                  R                  [        R                  " T[        R                  S9SS9mOeTR!                  5         [#        S TR                  5        5       5      (       a  [        T/ 5      m[	        TR                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R$                  S9SS9m[        T[        R                  5      (       dU  [        T5      [&        :X  d   e[        R                  R                  [        R                  " T[        R$                  S9SS9mOTR!                  5         TR)                  5       S:X  d   S5       eUc?  [        R                  R                  [        R                  " S
[        R$                  S9SS9nTR!                  5         UR!                  5         UR                  5       [        R$                  :w  a  [        [        R*                  R-                  U5      [        R.                  5      (       a  [        R                  R0                  UR3                  5          R5                  [        R$                  5      n[        R                  R                  [        R                  " U[        R$                  S9UR3                  5       S9nTc  S OTR                  5       m/ n[6        R8                  (       d  [6        R:                  (       Ga  [=        XUT	S9Gt npn[        [        R*                  R-                  U5      [        R.                  5      (       Ga\  [        R>                  " [        R@                  " [        R                  R0                  UR3                  5          5      [        R                  R0                  UR3                  5          5      (       a  [C        XU5      (       a  [        R                  R0                  UR3                  5          RE                  5       n[G        UUTTT5      u  mmmUU
UUUUU	UUUUUUU4S jnU R                  5       [        RH                  [        R                  4;   d   e[J        RL                  " UUTc  U TTUTU/OU TTUTUT/TS LUTc  / SQO/ SQS9  [	        U5      S
:X  d  [O        5       (       aK  [Q        TTT	T
TTS9nTc  S US'   URS                  TRT                  " Tc  U TTUTU4OU TTUTUT4U40 UD65        UR3                  5       [        R                  R0                  ;   d   eS S S S S.n[        [        R*                  R-                  T5      [        R.                  5      (       a  S US'   [        [        R*                  R-                  T5      [        R.                  5      (       a  S US'   [W        SUTc  U TTUTU/OU TTUTUT/UUS9n[	        U5      S:  a%  [        U/ US S QUR                  5       S   P75      nU$ )Nz2Only int8 weights are supported by oneDNN qlinear.rS   rT   r   r   r1   c              3   *   #    U  H	  oS :H  v   M     g7fr   N r+   r3   s     r-   r.   Dregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<genexpr>       >+=Cax+=   r   r   x_scale must be 0D or 1Dr   r   r   z(x_zp is incompatible with oneDNN qlinearr   rV   	out_dtypec                 d  >^^^^^	^
^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mS mT(       a  Tc   eTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUUU
UUUU4
S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   aw  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       T[        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc           	        >
 T" U 5      n[         R                  " U[        R                  5      nU S   4nS nS nS nT(       d  T" S5      nT" S5      nT" U5      nT" U5      nS nT(       a  Tc   eT" U5      n[	        TUUUUUU5      nT
b}  T" U5      n	T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U	[        R                  5      n	[         R                  " X5      nU$ NrT   r   r   r   r7   r   rN   r   r   )indexrC   weight_compens_indexrE   rF   rG   rD   rH   rM   _biasrs   
bias_dtypebias_loaderinput_loaderr;   w_scale_loaderweight_compens_loaderx_scale_loaderx_w_scale_loaderx_zp_loaders             r-   inner_fn]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn  s	   $0$7E %(LL$FE49"I<0'+H$(E'+H#B+9"+=(3B+9:N+O,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt';#'KrB   r\   r   r  rangesr   r   c                 B   > T" U 5      n[         R                  " UT5      $ rJ   r   r   r  rC   output_cast_loaderr   s     r-   inner_fn_cast_output_to_bf16qregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16O      (:5(A'*||E<'H HrB   r   _create_constantsc                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " UT
5      $ Ng      ?r   r      i   r7   r   r   rounduint8minimummaximumr   r  scale
zero_pointrC   	inv_scalevalqminqmaxclampedr  r   requant_input_loaders            r-   inner_fn_requanteregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requant^  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||G\'J JrB   r$  r%  r7   r   r   r   int8make_loaderr   	Pointwiserk   r]   r   get_device_or_errorloweringr  	functoolspartialr:   r   )input_bufferr  
output_bufr  r,  r  r  r  r  r+  r  r	  r
  r  r  rp   rn   rs   r  o_scaler   r   ro   r;   r    r<   r   r=   r   s        @@@@@@@@@@r-   rY   Kregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator  s   +!MM!NN!KK!JJ	0      (4'?'?'A0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K'( '(R &(\\#/#:#:#<"'--%-#/#8#8#:	&
  6>)B *D'Y*J
 (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0K" *,'1'E'E'G&2)2):):$4*/./2</@*"
 (2':':'<	*J  *)rB   )r   r   r   rS         )   r   r   r   rS   r;  r<  rW   rY   r   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmrs   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   Cregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rE    r   rB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rE    r   rB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rE    r   rB   )r   r;  r<  r=  c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rE        QWW->->qzz|-LrB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rE    rJ  rB   qlinear_unaryr   ),r   r7   r0  r]   r^   r   r%   r   r   r   r:   r   r'   r6   r   r   realizer5   r   r   	get_numelInputsKernelunwrap_storage_for_inputr*   r(   r&   r9   r   r_   r`   r   equal
zeros_liker   to_denserA   r   r   rg   r   rc   r   r   r   )rO   r   r   r   r    r   rs   r9  r   r   rn   ro   rp   rV   rq   w_zp_tensorrt   ru   r   rY   rv   r   rx   r  r;   r<   r=   aten_mkldnn_qlinear_unarys    `` ` ```````          @@@@r-   rL  1register_onednn_fusion_ops.<locals>.qlinear_unary  s   " !**,

: D: ZZ\F6{QR,-gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|
 ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  >>#q(T*TT(
 | ww22LL%++6V 3  OOLLN~~5;;.:88>!!4 4
  gg//@CCEKKPww22LLEKK@t}} 3  "&4>>3CJ*,G""f&>&>&>/6V|0,F} @@F))  (():):4==?)KL))$--/:  ,F}EE ww001G1G1IJSSUH 1 %	7&!{* {* {*z ;;=U[[%**,EEEE#//< GT='4H$wdS!%T!1)9< '92 7|q $9$;$;!(&2!-!%!(&/ <%)F6N-22< GT='4H$wdS	
 ! !))+qww/@/@@@@<<<<	M 88A!! 
 $Ma 88>!!  $Ma .< GT='4@$wdK+F 6{Qf&Ks&KV__5Fr5J&KLMrB   x2c                   >^^^^^^^	^
^^^^^^^ ^! U R                  5       nTR                  5       n[        U5      [        U5      :X  d   e[        U5      S:  a(  US:X  a"  [        U SUS   /5      n [        TSUS   /5      m[        T[        R
                  5      (       dU  [        T5      [        :X  d   e[        R                  R                  [        R                  " T[        R                  S9SS9mOeTR                  5         [        S TR                  5        5       5      (       a  [        T/ 5      m[        TR                  5       5      S;   d   S	5       eTc?  [        R                  R                  [        R                  " S
[        R                   S9SS9mUc?  [        R                  R                  [        R                  " S
[        R                   S9SS9n[        T[        R
                  5      (       dU  [        T5      ["        :X  d   e[        R                  R                  [        R                  " T[        R                   S9SS9mOTR                  5         TR                  5         UR                  5         UR%                  5       [        R                   :w  a  [        [        R&                  R)                  U5      [        R*                  5      (       a  [        R                  R,                  UR/                  5          R1                  [        R                   5      n[        R                  R                  [        R                  " U[        R                   S9UR/                  5       S9nUS:X  a  T
[        R                  [        R2                  4;   aS  TR%                  5       [        R                  [        R2                  4;   a!  TR%                  5       T
:w  a  [5        TT
5      mOTR%                  5       T
:X  d   S5       eTR%                  5       m Tb  TR%                  5       OS m/ n[6        R8                  (       d  [6        R:                  (       Ga  US:X  Ga  [=        XTUT
S9Gt nnpm[        [        R&                  R)                  T5      [        R*                  5      (       Ga  [        TR?                  5       R@                  5      S
:X  Gak  [        [        R&                  R)                  U5      [        R*                  5      (       Ga.  [        RB                  " [        RD                  " [        R                  R,                  UR/                  5          5      [        R                  R,                  UR/                  5          5      (       a  [G        UX5      (       a  [        R                  R,                  UR/                  5          nURI                  5       n[K        UUTTT5      u  mmm!UUUU	U
UUUUUUUU UU!U4S jn[L        RN                  " UUTc	  U TTUTUT/O	U TTUTUTT/TS LUTc  / SQO/ SQS9  [        U5      S
:X  d  [Q        5       (       aQ  [S        TT	T
UUUUTTTS9
nTc  S US'   URU                  T"RV                  " Tc	  U TTUTUT4O	U TTUTUTT4U40 UD65        UR/                  5       [        R                  R,                  ;   d   eS S S S.nTb  S US'   [Y        SUTc	  U TTUTUT/O	U TTUTUTT/UUS9n[        U5      S:  a+  US:X  a%  [        U/ US S QUR                  5       S   P75      nU$ )NrS   r   rT   r   r   r1   c              3   *   #    U  H	  oS :H  v   M     g7fr   r   r   s     r-   r.   Eregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<genexpr>  r   r   r   r   r   r   r   r8   zCdtype of accum for qlinear post op sum should be the same as outputr   c                   >^^^^^	^
^^^^^ T[         R                  [         R                  [         R                  [         R                  4;   d   eU R                  5       mTR                  5       mTR                  5       mS mT(       a  Tc   eTR                  5       mTR                  5       mTR                  5       m
TR                  5       mS mTb  TR                  5       mUUUUUU
UUUUUU4S jn[        R                  " U R                  5       [         R                  UU R                  5       S9nTS:w  a  [        UTTTS9nT[         R                  :X  aL  UR                  5       mUU4S jn[        R                  " UR                  5       TUUR                  5       S9nU$ T[         R                  [         R                  4;   a  SSKJm  UR                  5       m	UUU	4S jn[        R                  " UR                  5       [         R                  [        R                  " U[!        T5      [#        T5      S	9UR                  5       S9nU$ )
Nc           	        > T" U 5      nT" U 5      nS nS nS nU S   4nT(       d  T" S5      nT" S5      nT" U5      n[         R                  " U[        R                  5      nT" U5      nS nT(       a  Tc   eT" U5      n[	        TUUUUUU5      n	Tb}  T" U5      n
T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U
[        R                  5      n
[         R                  " X5      n	T[        R                  [        R
                  4;   d   eT[        R
                  :X  a%  [         R                  " U[        R                  5      n[         R                  " X5      n	U	$ r   r  )r  rC   _x2rE   rF   rG   r  rD   rH   rM   r  rs   r  r  r  r;   r  r	  x2_dtype	x2_loaderr
  r  r  s              r-   r  ^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fnZ  s\   $0$7E"+E"2C'+H$(E'+H49"I<0#B+9"+=(3B+9:N+O$'LL$FE,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwt'; $,u~~/N#NN#N'5>>9&)ll3&F#&774#5D#'KrB   r  r   r   c                 B   > T" U 5      n[         R                  " UT5      $ rJ   r  r  s     r-   r  rregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16  r  rB   r   r  c                   > T" U 5      nT	" SU-  U[         R                  S9u  pB[        R                  " X4-  5      U-   nT
[         R                  :X  a  T	" SS[         R                  S9u  pgOT	" SS[         R                  S9u  pg[        R
                  " [        R                  " XV5      U5      n[        R                  " U[         R                  5      $ r  r  r#  s            r-   r,  fregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<U(C8I$'%K5==9" 5	 '*ii0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD +.++ckk#6Ld*S'*||GU[['I IrB   r.  r/  ) r7  r  r8  r  r,  r  r  r  r  r+  r  r	  r_  r
  r  r  rs   r  r9  r   r   r   r   r   r;   r    r<   rW  r^  r   r=   r   s         @@@@@@@@@@@r-   rY   Lregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creatorB  s   +!MM!NN!KK!JJ	0      (4'?'?'A$&NN$4	0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K-( -( -(^ &(\\#/#:#:#<"'--%-#/#8#8#:	&
 &/)B * *(5*:	*J (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0J" *,'1'E'E'G&+kk)2):):$4*/./2</@*"
 (2':':'<	*J  *)rB   )r   r   r   rS   r;  r<  r=  )   r   r   r   rS   r;  r<  r=  r>  )
r?  r@  r   other_scaleother_zpbinary_post_opr   unary_post_opunary_post_op_argsunary_post_op_algorithmrs   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   Dregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   rB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rn    r   rB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rn    r   rB   )r   r;  r<  c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   rn    rJ  rB   rf  qlinear_binaryr   )-r]   r^   r   r%   r   r   r   r:   r   r'   r6   r7   r   r   rM  r5   r   r   r   rO  rP  r*   r(   r&   r9   r   r   r   r_   r`   r   
get_layoutsizerQ  rR  r   rS  rA   r   rg   r   rc   r   r   r   )#rO   r   r   r   r    r   rW  rs   r9  r   r   x2_scalex2_zpr   r   r   r   r   rV   rq   x2_sizerT  rt   ru   r   rY   rv   r   rx   r  r;   r<   r^  r=   aten_mkldnn_qlinear_binarys#    `` ` `````    ```           @@@@@r-   rr  2register_onednn_fusion_ops.<locals>.qlinear_binary  s[   6 ZZ\FkkmGv;#g,...6{Q;%#7R,-"r72;/0gr||44G}---''55LL>Y 6  !>7+;+;+=>>> #7B/G7++-.&8T:TT8|ww22LL%++6V 3  |ww22LL%++6V 3  dBLL11DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4 4  gg//@CCEKKPww22LLEKK@t}} 3  e#MMNN$  lln(GG||~5
 &b,7<<>\9 ]9 ||~H-1-=)4J*,G##v'?'?'?&3:b<40FAb @@F))  DOO-223q8"@@F))  (():):4==?)KL))$--/:  .faGG ww001G1G1IJH'002H
 1 %	7&!F* F* F*P $//< GT='4L$wbRVW!%T!1)9  < '<5 7|q $9$;$;!(&2!- ("#.!&",'4,< <%)F6N.33< GT='4L$wbRVW	
 ! !))+qww/@/@@@@<<<M
 #La . < GT='4D$wb$O+F 6{Q;%#7f&Ks&KV__5Fr5J&KLMrB   zmkl::_mkl_linearrU   packed_worig_wc          	      r  > / n[         R                  (       d  [         R                  (       aH  [        USS/5      n[	        XUS9Gt pp[        XPU5      (       a  [        R                  " UUXU/SSS/S9  [        U5      S:X  d  [        5       (       a#  UR                  TR                  XU4US US95        UR                  5       [        R                  R                  ;   d   eUR                  5       [        R                  R                  ;   d   eS S	 S
.n	[!        SUXU/UU	S9n
Ub  [#        X5      n
U
$ )Nr   r   rU   TrS   )rX   r   )r   
batch_sizec                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   Gregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>3      !2!21::<!@rB   c                 X    [         R                  R                  U R                  5          $ rJ   r   r   s    r-   r   r  4  r  rB   )r   rS   packed_linearr   )r   r_   r`   r   r   r   r   rg   r^   r   r   r   r&   r   r'   r(   r   r   )rO   rz  r{  rQ   r}  rV   rt   r   ru   r   rx   aten_mkl_linears              r-   mkl_packed_linear5register_onednn_fusion_ops.<locals>.mkl_packed_linear  s?    /1&&&*B*B#*6Aq6#:L293/Q -VEE'33#"&1$(+,a& w<1$(=(?(?NN',,&16Tj -   ((*agg.?.????(AGG,=,==== A@! %>#&)"/% = ^FrB   rJ   )&r7   _C_has_mkldnn r   r   r   mkldnn_linear_pointwiseLinearUnaryrm   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv_pointwiser   r   boolri   r   qconv2d_pointwisebinary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr   r   r   )cpu_needs_realized_inputsr   r   r   r   r   r   r   r   r   rL  rr  r  r  r   r   rx  rU  r   s                @@@@@@r-   register_onednn_fusion_opsr     s   xx#5II..'!$0077	$
  %7II..55'!$1188	%
! %7II..'!$99@@	%
! &8II..55'!$??FF	&
" II33II44II==II..!!))II,,%
! 
599++BB	C			 	 
D	6 
599++BBII	J			 	 		 
K	B 
599++CCJJ	K			 	 		 
L	B 
599++==	> ?	?	?	 ?	 
??	B 
599++==DD	EQU;	;	&;	+4;	9B;	 
F;	z 
599++LL	M			 	 
N	: 
40088	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 
:&	P 
599++;;QU	V1	1	 %	1	
 1	 1	 1	 
W1	f 
II..554

 
II..<<RV

F	F	 %	F	
 F	 F	 F	 F	



F	P 
599++==SW	X j	j	 %	j	
 j	 j	 j	 
Yj	X	 
II..554

 
II..<<RV

, '@	@	 %	@	
 @	 @	 @	 @	



@	D
 880		))" %(88??	O &,,UYY]]-F-FGuyy}}889 00#0 "0 I&	0 :0d 	"";<rB   )NNNN)/r5  typingr   r7   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   r  r   r   codegen.cpp_gemm_templater   !codegen.cpp_grouped_gemm_templater	   codegen.cpp_utilsr   r   r4  r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   virtualizedr   r   r   Tensortupler  rA   rN   ri   r}   _inductor_lowering_functionr  r   rB   r-   <module>r     sc      $ $ 4  6 E 8    
 @ ) )+ll+<<+ \\+ ,,	+
 \\+ 4x556+\.%).. . x 	.
 H. x . ". .j 
==I= I=@ 59  1ArB   