
    7h                     x   S SK Jr  S SKJrJr  S SKrS SKrS SKJr  S SK	J
r
  SSKJrJrJrJrJrJrJrJrJrJrJrJrJr  SSKJrJrJr  SS	KJr      S4S
SSSSSS\\   S\\   S\\   S\S\ S\\\      S\\!S      S\S   4S jjr"   S5S
SSSSSS\\!S      S\S   S\ 4S jjr#S r$ " S S\5      r% " S S\5      r& " S S\5      r' " S  S!\5      r( " S" S#\5      r) " S$ S%\5      r* " S& S'\5      r+ " S( S)\5      r, " S* S+\5      r- " S, S-\5      r. " S. S/\5      r/ " S0 S1\5      r0 " S2 S3\5      r1g)6    )Sequence)AnyOptionalN)make_channels_last_strides_for
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutget_device_typeir_node_to_tensor is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayout	TensorBox)convert_shape_to_inductorpad_listlikeSUPPORTED_MKLDNN_DEVICES)Vxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingquantize_argsotherc                    S nSS jnS nUR                  5         UR                  5         Ub  UR                  5         [        R                  R                     [	        USS9n[	        USS9n[        UR                  5       5      S-
  nS[        U5      s=:  a  U::  d   e   eS[        U5      s=:  a  U::  d   e   eS[        U5      s=:  a  U::  d   e   e[        UU5      n[        UU5      n[        UU5      nU	c  [        S/U5      n	O'S[        U	5      s=:  a  U::  d   e   e[        U	U5      n	[        U[        [        R                  R                  R                  45      (       d   eU(       a(  U" UU5      nUR                  5       nU" UUUU	UUU5      nO[        UR                  5      n[        UR                  5      n[        U5      [        U5      :w  a1  [        U5      S	:X  a  [        U5      S
:X  d   eUR!                  S5        U" UUUUU5      nS/[        [#        [%        S[        U5      S-   5      5      5      -   n[        U5      /U-   nSSS5        U R'                  UW5      n[)        S W 5       5      (       + nU(       d  [+        U5      S:X  a'  [-        U5      (       a  [.        R0                  " U5      nOH[+        U5      S:X  a.  UR3                  5       S   S:X  a  [.        R0                  " U5      nO[5        U5      n[+        U5      [+        U5      :X  d   e[+        U5      [6        ;   d   eU/nU
bX  U
u  nnnnUR                  5         UR                  5         UR                  5         UR                  5         UUU/-   U/-   UU/-   nOUU/-  nUb/  U R'                  UU5      n[        U[8        5      (       d   eUU/-  n[;        UR=                  5       UR?                  5       [A        U5      [A        U5      5      nXEXg/n U(       a  U RC                  SU	5        Ub  URE                  U5        OU RC                  SU5        UU UUU4$ ! , (       d  f       GN	= f)ae  
This function is a helper function to prepare inputs, layout and constant args
for convolution post-op fusion's create function, including deciding the output
layout (channels first or channels last), realizing inputs and make them etc. The
function only supports the CPU/XPU device since conv post-op fusion kernel is only
supported on CPU/XPU right now.
c                    [        U 5      [        U5      :X  d   S5       e[        U 5      nUS:  d   S5       eSnSn	/ n
U
R                  X   5        U
R                  X   U-  5        [        SU5       HK  nX   S-
  X[S-
     -  S-   nX   S-
  XKS-
     -  X+S-
     S-  -
  U-   X;S-
     -   nU
R                  U5        MM     [        [	        [
        U
5      5      $ )NzExpect input dim == weight dim   zExpect input dim > 2r   r	   )lenappendrangelistmapint)output_sizeweight_sizer   r#   r   r    r!   dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_ds                 S/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_size<_prepare_convolution_fusion_create.<locals>._conv_input_size5   s    ;3{#33U5UU3+Qw...w	$%!
+01+@6IJq#A!nq(HUO;a?F!#v!e}4q5>A%' !Q'(  l+  CZ())    Nc                 6   US Ln[        U 5      n/ nUR                  U S   5        UR                  US   5        [        SU5       HO  nU(       a  XHS-
     OSn	XU   S-
  -  S-   n
X   SX(S-
     -  -   U
-
  UUS-
     -  S-   nUR                  U5        MQ     U$ )Nr   r(   r	   )r)   r*   r+   )r4   r0   r   r   r    has_dilationr1   r/   r5   	dilation_r6   output_size_ds               r8   _conv_output_size=_prepare_convolution_fusion_create.<locals>._conv_output_sizeM   s    t+*o:a=);q>*q#A+7QQIa.1"459F']a'a%..@AFJvAP M }-  r;   c                 R  ^ U R                  5       m[        T5      nUS:  d   S5       eUS:  aW  / nUR                  TS   U-  5        UR                  TS   U-  5        UR                  U4S j[	        SU5       5       5        U$ U R                  SS5      R                  5       nU$ )Nr(   zExpect weight dim > 2r	   r   c              3   .   >#    U  H
  nTU   v   M     g 7fN ).0r5   prepacked_weight_sizes     r8   	<genexpr>[_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size.<locals>.<genexpr>k   s     OA4Q7s   )sizer)   r*   extendr+   	transpose)prepacked_weightr!   r1   r0   rG   s       @r8   _original_deconv_weight_sizeH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size`   s     !1 5 5 7'(Qw///wA:K4Q7&@A4Q7&@AOq#OO  +44Q:??AKr;   T)guard_shaper(   r         r	   c              3   B   #    U  H  n[        U[        5      v   M     g 7frD   )
isinstancer.   )rF   is     r8   rH   5_prepare_convolution_fusion_create.<locals>.<genexpr>   s     G+AZ3//+s   xpurD   )#realizer   graph	fake_moder   r)   rJ   r   rT   r.   sympycorenumbersIntegerr,   shapepopreversedr+   require_stride_orderallr   r   r   contiguous_strides
get_strider   r   r   r   get_device_or_error	get_dtyper   insertr*   )!clsr   r   r   r   r   r    r!   r"   r#   r$   r%   r9   r@   rN   x_fakeweight_fakedimsr0   r4   r/   x_shapeweight_shapereq_stride_orderdynamic_shapesoutput_strideinputsx_scalex_zero_pointw_scalew_zero_pointkernel_layoutconstant_argss!                                    r8   "_prepare_convolution_fusion_createry      s7   .*0&  IIK
NN	
		"1$7'DA6;;=!A%3w<'4'''''3x=(D(((((3v;&$&&&&&w-$/fd+!)1#t4Ns>*2d22222).$?N&3

(:(:(B(B"CDDDD 7{FKKJ*K 6<<(G 1 12L7|s<007|q(S->!-CCC  #+K 3huQFa/H&I!JJ 0125EE_ 
b 	  $45A G+GGGN/!,5
*1
-
-&99+F 
	u	$):a)?&99+F6{C1!88881!9999SF 7D4w7L11VH<?VV6(((0@A%++++5'		!+.!-0	M h7MQ/dQ%=-1A5HHW 
	s   HQ
Q.
binary_sumc           
         UR                  5         UR                  5         Ub  UR                  5         UR                  5       Gt pxUR                  5       u  p[        U5      U	/-   n
[        [        [	        [        UR                  5       5      5      5      5      nU R                  X5      n[        U5      [        U5      :X  d   e[        U5      [        ;   d   eU/nUbV  Uu  pnnUR                  5         UR                  5         UR                  5         UR                  5         XU/-   U/-   UU/-   nOX/-  nUb  U(       a  U R                  X[5      nX/-   n[        R                  " U
5      n[        UR                  5       UR                  5       U
U5      n/ nUb  UR                  U5        OUR                  SU5        UUUX4$ )z
This function is a helper function to prepare inputs, layout and constant args
for linear post-op fusion's create function. The function only supports the CPU device
since linear post-op fusion kernel is only supported on CPU right now.
r   )rX   get_sizer,   ra   r+   r)   rb   r   r   r   rd   r   
get_devicerg   r*   rh   )ri   r   r   r   r$   r%   rz   m_ocr/   ro   rr   rs   rt   ru   rv   rq   rw   rx   s                       r8   _prepare_linear_fusion_creater      s    IIK
NNJJLEQ OOEAq'RD.KHU3qzz|+<%=>?  5A1!88881!9999SF 7D4wL11VH<?VV(,,UEE'!"55kBM			M  "MdQ%=-1AHHr;   c                     [        U R                  5       U / 5      n[        U R                  5       S9U l        U/U l        U$ )Ndevice)r   
get_layoutr   r}   layoutoutputs)packed	output_irs     r8   _create_output_noder   !  sD    
I
 &V->->-@AFM[FNr;   c                      ^  \ rS rSr S SU 4S jjjrU 4S jr\SSSSSSS\\   S	\\   S
\\   S\S\	\\
      4S j5       rSrU =r$ )ConvolutionUnaryi,  c           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  g )Nr   aoti_torch__mkldnn__convolution_pointwiseop_overloadcpp_kernel_name)	r   device_typesuper__init__torchopsmkldnn_convolution_pointwisedefaultselfr   rr   rx   	__class__s       r8   r   ConvolutionUnary.__init__-  sa     +6!95		((??GG)$*:*:);;YZ 	 	
r;   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g Nz&torch/csrc/inductor/aoti_torch/c/shim_z.hinclude_extra_headerr   r   codegenr   wrapperr   s     r8   r   ConvolutionUnary.codegen=  2    $$4T5E5E4FbI	
 	 r;   r   r   r   r   padding_stride_r>   r!   scalarsc           
      x    [        XX#XEXg5      u  nnnnnUU[        U	5      U
/-   n[        UUUS9n[        U5      $ )Nr   rr   rx   )ry   r   r   r   )ri   r   r   r   r   r   r>   r!   attrr   	algorithmrr   rx   rw   r   r   s                   r8   createConvolutionUnary.createC  sj    ( /F(Y
	
 &#G,)
 

 " '

 #6**r;   )r   rE   returnN__name__
__module____qualname____firstlineno__r   r   classmethodr,   r.   r   r   r   __static_attributes____classcell__r   s   @r8   r   r   ,  s    
 	

 

 
 !  + +  + 	 +
 s) + c + 9 +  + $s)$ +  +r;   r   c                      ^  \ rS rSr  S SU 4S jjjrU 4S jr\SSSSSSSSS	\\   S
\\   S\\   S\S\	S\
\   S\
\	   S\
\\      S\
\	   4S j5       rSrU =r$ )ConvolutionBinaryig  c           
         > [        US   5      U l        [        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  X@l	        g )Nr   r   %_mkldnn__convolution_pointwise_binaryr   )
r   r   r   r   r   r   r   r   binarycpp_constant_args)r   r   rr   rx   r   r   s        r8   r   ConvolutionBinary.__init__h  si     +6!95		((??FF)$*:*:);;`a 	 	
 "3r;   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r8   r   ConvolutionBinary.codegenz  r   r;   r   r   r%   r   r   r   r   r>   r!   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc           
          [        XX4XVXx5      u  nnnnnU R                  UU5      nUR                  SU5        UU	U
U[        U5      U/-   n[	        UUUS9n[        U5      $ )Nr	   r   )ry   rb   rh   r   r   r   )ri   r   r%   r   r   r   r   r>   r!   r   r   r   r   r   rr   rx   rw   ro   r   r   s                       r8   r   ConvolutionBinary.create  s    . /F(Y
	
 ((0@Aa%#M2)
 
 # '

 #6**r;   )r   r   )rE   rE   r   )r   r   r   r   r   r   r   r,   r.   strr   floatr   r   r   r   r   s   @r8   r   r   g  s    
 3 
3 3$! '+'+ '+ 	'+
 '+ s)'+ c'+ 9'+ '+ '+ uo'+ SM'+  S	*'+ "#'+ '+r;   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jrS\\R                     4S jr	\
SSSSS	SS
SS\\   S\\   S\\   S\S\S\\   S\\   S\\\      S\\   4S j5       rSrU =r$ )ConvolutionBinaryInplacei  r   c           
        > [        US   5      U l        US   US   /USS  -   n[        TU ]  UUUS [        R
                  R                  R                  R                  SU R                   S3S9  [        [        US   R                  5       S9US   U 5      [        [        US   R                  5       S9US   U 5      /U l        g )Nr   r	   r(   r   &_mkldnn__convolution_pointwise_binary_r   r   )r   r   r   r   r   r   r   _convolution_pointwise_r   r   r   r}   mutation_outputs)r   rw   rr   rx   reordered_inputsr   s        r8   r   !ConvolutionBinaryInplace.__init__  s     +6!95"1Ivay1F12J>		((@@GG)$*:*:);;ab 	 	
 :VAY-A-A-CDfQiQUV:VAY-A-A-CDfQiQUV!
r;   c                 b   > UR                  SU R                   S35        [        TU ]  U5        g r   r   r   s     r8   r    ConvolutionBinaryInplace.codegen  r   r;   c                     [        5       $ rD   r   r   s    r8   get_unbacked_symbol_defs1ConvolutionBinaryInplace.get_unbacked_symbol_defs  
    |r;   r   r   r%   r   r   r   r   r>   r!   r   r   r   r   r   c           
          [        XX4XVXx5      u  nnnnnU R                  UU5      nUR                  SU5        UU	U
U[        U5      U/-   n[	        [        US   R                  5       S9UUS9nUR                  S   $ )Nr	   r   )rw   rr   rx   r   )ry   rb   rh   r   r   r   r}   rr   )ri   r   r%   r   r   r   r   r>   r!   r   r   r   r   r   rr   rx   r   ro   r   s                      r8   r   ConvolutionBinaryInplace.create  s    . /F(Y
	
 ((0@Aa%#M2)
 
 *$F1I,@,@,BC'
 }}Qr;   )r   r   r   r   )r   r   r   r   r   r   r   r[   Symbolr   r   r,   r.   r   r   r   r   r   r   r   r   s   @r8   r   r     s    
 	

 

 
0!*U\\*B  * *  *  	* 
 *  s)*  c*  9*  *  *  uo*  SM*   S	**  "#*  * r;   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jr\SSSSSSS	\\   S
\\   S\\   S\\   S\S\	\\
      4S j5       rSrU =r$ )ConvolutionTransposeUnaryi  rE   c           	         > [         TU ]  UUUS [        R                  R                  R
                  R                  SS9  g )N6aoti_torch_cpu_mkldnn__convolution_transpose_pointwiser   )r   r   r   r   r    _convolution_transpose_pointwiser   r   s       r8   r   "ConvolutionTransposeUnary.__init__  s?     			((IIQQT 	 	
r;   c                 F   > UR                  S5        [        TU ]	  U5        g Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hr   r   r   r   s     r8   r   !ConvolutionTransposeUnary.codegen      $$%RS r;   r   r   r   r   r   output_padding_r   r>   groups_r   c                     Sn[        U UUUUUUUUU5
      u  nnnnnUU	[        U
5      U/-   n[        UUUS9n[        U5      $ )NTr   )ry   r   r   r   )ri   r   r   r   r   r   r   r>   r   r   r   r   r"   rr   rx   rw   r   r   s                     r8   r    ConvolutionTransposeUnary.create  s     
 /
	
 &#G,)
 

 + '

 #6**r;   r   r   r   r   s   @r8   r   r     s    
 	

 

 
! ++++ ++ 	++
 s)++ c++ c++ 9++ ++ $s)$++ ++r;   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jr\SSSSSSSSS	SS
SSSS\\   S\\   S\\   S\S\	S\4S j5       r
SrU =r$ )QConvPointWisePT2Ei>  c           	         > [        U5      S:H  U l        [        TU ]  UUUS[        R
                  R                  R                  R                  SS9  g)a  
if bias is not None
    - inputs = [x, w, b, weight_scale, weight_zp]
    - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, w, weight_scale, weight_zp]
    - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
   N&aoti_torch_cpu__qconv_pointwise_tensorr   )	r)   has_biasr   r   r   r   onednnqconv_pointwiser   r   s       r8   r   QConvPointWisePT2E.__init__?  sO      Fq(		((88@@D 	 	
r;   c                    > UR                  S5        [        TU ]	  U5        [        U R                  [
        5      (       a  U R                  U5        g g r   r   r   r   rT   r   r   codegen_size_assertsr   s     r8   r   QConvPointWisePT2E.codegenY  B    $$%RS dkk6**%%g. +r;   qxr   rs   rt   qwru   rv   r   r   r   r    r!   output_scaleoutput_zero_pointc                 .   SnS n[        U UUUU	UU
UUUX#XV/5      u  nnnnnUc  US   US   sUS'   US'   OUS   US   sUS'   US'   UUUUU[        U5      U/-   nUc   eU[        R                  [        R                  4;   a  UUl        [        UUUS9$ )NFr(   r	   r   r   )ry   r   r   float32bfloat16dtyper   )ri   r   rs   rt   r   ru   rv   r   r   r   r    r!   r   r  output_dtyper   r   r   r"   r#   rr   rx   rw   r   s                           r8   r   QConvPointWisePT2E.create_  s   * 
 /G:
	
 <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#G,)
 
 '''EMM5>>:: #/M! '
 	
r;   r   r   r   )r   r   r   r   r   r   r   r,   r.   r   r   r   r   r   s   @r8   r   r   >  s    
 	

 

 
4/ B
B
 B
 "	B

 B
 B
 "B
 B
 S	B
 cB
 s)B
 B
 B
 B
 B
r;   r   c                      ^  \ rS rSr S SU 4S jjjrU 4S jrS\\   4S jrS\	\
R                     4S jr\SSS	SS
SSSSSSSS\\   S\\   S\\   S\SSSS4S j5       rSrU =r$ )QConvPointWiseBinaryPT2Ei  r   c           	         > [        U5      S:H  U l        SU l        [        TU ]  UUUS[
        R                  R                  R                  R                  SS9  g)a  
Needs input/weight/output qparams
if bias is not None
    - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum, b]
    - const_args = [stride, padding, dilation, groups, o_scale, o_zp,
    output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum]
    - const_args [b, stride, padding, dilation, groups, o_scale, o_zp,
     output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
      N/aoti_torch_cpu__qconv2d_pointwise_binary_tensorr   )
r)   r   idx_for_inplace_sumr   r   r   r   r   qconv2d_pointwiser   r   s       r8   r   !QConvPointWiseBinaryPT2E.__init__  sW    " Fq(#$ 		((::AAN 	 	
r;   c                    > UR                  S5        [        TU ]	  U5        [        U R                  [
        5      (       a  U R                  U5        g g r   r   r   s     r8   r    QConvPointWiseBinaryPT2E.codegen  r   r;   c                 R    U R                   U R                     R                  5       /$ rD   )rr   r  get_namer   s    r8   get_mutation_names+QConvPointWiseBinaryPT2E.get_mutation_names  s#    D445>>@AAr;   c                     [        5       $ rD   r   r   s    r8   r   1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defs  r   r;   r   r   rs   rt   r   qaccumr   r   r   r    r!   r   r  c                    SnS n[        U UUUU
U	UUUUX#XV/U5      u  nnnnnUc  US   US   sUS'   US'   OUS   US   sUS'   US'   UUUUUUUUU[        U5      U/
-   nUS:X  d   S5       e[        R                  R	                  UR                  5       5        [        [        UR                  5       S9UUS9nUR                  UR                     $ )	NFr(   r	   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.r   r   )ry   r   r   rY   mark_buffer_mutatedr  r
  r   r}   rr   r  )ri   r   rs   rt   r   ru   rv   r  r   r   r   r    r!   r   r  r  accum_scaleaccum_zero_pointr   alphar   r   r   r"   r#   rr   rx   _kernel_layoutro   r   s                                 r8   r   QConvPointWiseBinaryPT2E.create  s=   4 
 /G:
	
" <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#M2)
 
 e# 	
Q	
# 	
##FOO$56)V%6%6%89'
 }}V7788r;   r   r  r   r   )r   r   r   r   r   r   r   r   r  r   r[   r   r   r   r,   r.   r   r   r   r   s   @r8   r
  r
    s    
 	

 

 
8/BHSM B*U\\*B  O9O9 O9 "	O9
 O9 O9 O9 S	O9 cO9 s)O9 O9 "O9 'O9 O9r;   r
  c                   P   ^  \ rS rSr S SU 4S jjjrU 4S jr\S 5       rSrU =r	$ )MKLPackedLineari!  rE   c                    > [         TU ]  UUUS [        R                  R                  R
                  R                  S9  g N)r   )r   r   r   r   mkl_mkl_linearr   r   s       r8   r   MKLPackedLinear.__init__"  s:     			1199 	 	
r;   c                 F   > UR                  S5        [        TU ]	  U5        g r   r   r   s     r8   r   MKLPackedLinear.codegen0  r   r;   c                    U R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5       Gt pgUR                  5       u  p[        U5      U/-   n	[        R
                  " U	5      n
XU/nU/nUb  X/-  nOUR                  SS 5        [        [        UR                  5       UR                  5       X5      UUS9$ )Nr   r   )require_stride1realize_inputr|   r,   r   rd   rh   r%  r   r}   rg   )ri   r   packed_worig_wB
batch_sizer~   r   r   r/   rq   rr   rx   s                r8   r   MKLPackedLinear.create4  s     1 1! 45$$S%6%6v%>?

!1gn&99+Fv&#=cMF  D){ '
 	
r;   r   r   
r   r   r   r   r   r   r   r   r   r   r   s   @r8   r%  r%  !  s5    
 	

 

 
! 
 
r;   r%  c                   V   ^  \ rS rSr S SU 4S jjjrU 4S jr\S 5       rS rSr	U =r
$ )	LinearUnaryiL  rE   c           	         > [         TU ]  UUUS [        R                  R                  R
                  R                  SS9  g )N aoti_torch_cpu__linear_pointwiser   )r   r   r   r   r   _linear_pointwiser   r   s       r8   r   LinearUnary.__init__M  s?     			((::BB> 	 	
r;   c                 F   > UR                  S5        [        TU ]	  U5        g r   r   r   s     r8   r   LinearUnary.codegen\  r   r;   c                    U R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5       Gt pxUR                  5       u  p[        U5      U	/-   n
X/nXE(       a  UOS/U/nUb2  U R                  U R                  U5      5      nUR	                  U5        OUR                  SS 5        [        [        UR                  5       UR                  5       U
S9UUS9n[        U5      $ )Nr   r   r  rJ   r   )require_contiguousr/  r|   r,   r*   rh   r7  r   r}   rg   r   )ri   r   wr2  r   r   r   r~   _icr   r/   rr   rx   r   s                 r8   r   LinearUnary.create`  s    ""3#4#4Q#78""3#4#4Q#78**,**,1gn'wtYG=&&s'8'8';<AMM!  D)||~kkm 
 '
 #6**r;   c                     g rD   rE   r   s    r8   apply_constraintLinearUnary.apply_constraint{      r;   r   r   )r   r   r   r   r   r   r   r   rF  r   r   r   s   @r8   r7  r7  L  s?    
 	

 

 
! + +4 r;   r7  c                   Z   ^  \ rS rSrSr S S	U 4S jjjrU 4S jr\S 5       rS r	Sr
U =r$ )
LinearBinaryi  z)torch.ops.mkldnn._linear_pointwise.binaryrE   c           	         > [         TU ]  UUUS [        R                  R                  R
                  R                  SS9  g )N'aoti_torch_cpu__linear_pointwise_binaryr   )r   r   r   r   r   r:  r   r   s       r8   r   LinearBinary.__init__  s?     			((::AAE 	 	
r;   c                 F   > UR                  S5        [        TU ]	  U5        g r   r   r   s     r8   r   LinearBinary.codegen  r   r;   c                 :   U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5       Gt pgUR                  5       u  p[        U5      U/-   n	XU/n
U/nUb2  U R                  U R                  U5      5      nU
R	                  U5        OUR                  SU5        [        [        UR                  5       UR                  5       U	S9U
US9n[        U5      $ )Nr   r@  r   )rA  r/  r|   r,   r*   rh   rJ  r   r}   rg   r   )ri   r   yrB  r2  r   r~   rC  r   r/   rr   rx   r   s                r8   r   LinearBinary.create  s   ""3#4#4Q#78""3#4#4Q#78""3#4#4Q#78**,**,1gn=&&s'8'8';<AMM!  A&||~kkm 
 '
 #6**r;   c                     g rD   rE   r   s    r8   rF  LinearBinary.apply_constraint  rH  r;   r   r   )r   r   r   r   r6   r   r   r   r   rF  r   r   r   s   @r8   rJ  rJ    sD    8F 	

 

 
! + +6 r;   rJ  c                   z   ^  \ rS rSr  S SU 4S jjjrU 4S jr\SSSSSSSSS	SS
SSSS\S\4S j5       r	Sr
U =r$ )QLinearPointwisePT2Ei  c           	         > X@l         [        TU ]	  UUUS[        R                  R
                  R                  R                  SS9  g)ag  
if bias is not None
    - inputs = [x, w, b, weight_scale, weight_zp]
    - const_args is: [x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, w, weight_scale, weight_zp]
    - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
      fp32_output, unary_attr, unary_scalars, unary_algorithm]
N(aoti_torch_cpu__qlinear_pointwise_tensorr   )r   r   r   r   r   r   qlinear_pointwisetensorr   r   rr   rx   r   r   s        r8   r   QLinearPointwisePT2E.__init__  sD    " !));;BBG 	 	
r;   c                    > UR                  S5        [        TU ]	  U5        [        U R                  [
        5      (       a  U R                  U5        g g r   r   r   s     r8   r   QLinearPointwisePT2E.codegen  B    $$%RS dkk6**%%g. +r;   r   r   rs   rt   r   ru   rv   r   r   r  c           
          [        U UUUX#XV/5      u  pn  nUUU	U
U[        U5      U/-   nU
c   eU
[        R                  [        R                  4;   a  U
Ul        [        UUUUS LS9$ )Nr   rr   rx   r   )r   r   r   r  r  r  rV  )ri   r   rs   rt   r   ru   rv   r   r   r  r  post_op_namepost_op_argspost_op_algorithmrr   rx   rw   r   s                     r8   r   QLinearPointwisePT2E.create  s    " 8UG:8
4q! &#L1)
 
 '''EMM5>>:: #/M# '$&	
 	
r;   r  rE   Tr   )r   r   r   r   r   r   r   r   r.   r   r   r   r   s   @r8   rV  rV    s    
 
 

 
6/ ,
,
 ,
 "	,

 ,
 ,
 ",
 ,
 ,
 ,
 ,
r;   rV  c                      ^  \ rS rSr  S SU 4S jjjrU 4S jrS\\   4S jr\	SSSSS	SS
SSSSSSSSSS\
S\4S j5       rSrU =r$ )QLinearPointwiseBinaryPT2Ei  r   c           	         > X@l         SU l        [        TU ]  UUUS[        R
                  R                  R                  R                  SS9  g)a  
if bias is not None
    - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2, bias]
    - const_args is: [o_scale, o_zp,
      fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
else
    - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2]
    - const_args is: [bias, o_scale, o_zp,
      fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
r  N/aoti_torch_cpu__qlinear_pointwise_binary_tensorr   )	r   r  r   r   r   r   r   rY  binary_tensorr[  s        r8   r   #QLinearPointwiseBinaryPT2E.__init__	  sL    " !#$ ));;IIM 	 	
r;   c                    > UR                  S5        [        TU ]	  U5        [        U R                  [
        5      (       a  U R                  U5        g g r   r   r   s     r8   r   "QLinearPointwiseBinaryPT2E.codegen%  r   r;   c                     U R                   S   nUS:X  a(  U R                  U R                     R                  5       /$ / $ )Nr  )rx   rr   r  r  )r   binary_post_ops     r8   r  -QLinearPointwiseBinaryPT2E.get_mutation_names+  s@    ++B/U"KK 8 89BBDEEIr;   r   r   rs   rt   r   ru   rv   r%   r   r   r  c                    [        U UUUX#XV/UUS:H  5      u  nnnnnUU	U
UUUUUU[        U5      U/
-   nUS:X  ai  [        R                  R	                  UR                  5       5        [        [        UR                  5       S9UUUS LS9nUR                  UR                     $ Uc   eU[        R                  [        R                  4;   a  UUl        [        UUUUS LS9$ )Nr  r   ra  )r   r   r   rY   r  r  rh  r   r}   rr   r  r   r  r  r  )ri   r   rs   rt   r   ru   rv   r%   r   r   r  r  other_scaleother_zprq  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrr   rx   rw   ro   r   s                           r8   r   !QLinearPointwiseBinaryPT2E.create2  s    8 *G:e#
	
 &#$67#)
 
 U"GG''(89/!)9)9);<+d*	F ==!;!;<<'''EMM5>>:: #/M) '$&	
 	
r;   r#  rf  r   )r   r   r   r   r   r   r   r   r  r   r   r.   r   r   r   r   s   @r8   rh  rh    s    
 
 

 
8/HSM  H
H
 H
 "	H

 H
 H
 "H
 H
 H
 H
 H
 H
r;   rh  c            !          ^  \ rS rSr S SU 4S jjjr\SSSSSSSSS	SS
SSSS\S\\   S\S\S\S\S\S\S\4 S j5       r	U 4S jr
SrU =r$ )MkldnnRnnLayeri~  rE   c                    > [         TU ]  UUUS [        R                  R                  R
                  R                  S9  g r'  )r   r   r   r   atenmkldnn_rnn_layerr   r   s       r8   r   MkldnnRnnLayer.__init__  s:     			77?? 	 	
r;   r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    U R                  U R                  U5      5      nUR                  5         U R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nU R                  U R                  U5      5      nUR                  5         U R                  U R                  U5      5      nUR                  5         UR                  5       n[	        U5      S:X  d   S5       eUu  nnnUUU/nUR                  5       nUR                  5       nXX4XVU/nUU	U
UUUUUU/	n[        [        UR                  5       S9UUS9nS nUUUS//nU" UU5      [        R                  " U5      [        R                  " U5      S//n[        [        UU5      5       VVVs/ s HE  u  nu  nn[        [        UR                  5       UR                  5       UU5      U[        U4/5      PMG     n nnnU Ul        U $ s  snnnf )NrQ   zExpect lstm input to be 3Dr   )rr   rx   c                 Z    [        U 5      S:X  d   S5       e[        R                  " U 5      $ )NrQ   zExpect output_shape to be 3D)r)   r   rd   )output_shaper  s     r8   get_strides_of_lstm_output9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_output  s,    |$)I+II)!44\BBr;   r	   )r.  r/  freeze_layoutr|   r)   r{  r   r}   r   rd   	enumeratezipr   r   rg   tupler   )!ri   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r4   
seq_length
mini_batchr  hy_shapecy_shaperr   rx   r   r  output_sizesoutput_stridesrU   r/   rq   r   s!                                    r8   r   MkldnnRnnLayer.create  sX   (  1 1! 45 	
  !2!22!67  !2!22!67  !2!22!67  !2!22!67  !2!22!67
  !2!22!67
ZZ\
:!#A%AA# .8*
J
"J<;;=;;=,

  Q\\^4'
	C %h1#>&|[A--h7--h7C	
" 4=L.14
4//K LLNKKM!	 	4 	 
 ##
s   ,AIc                 D   > UR                  S5        [        TU ]	  U5      $ r   r   r   s     r8   r   MkldnnRnnLayer.codegen  s!    $$%RSww''r;   r   r   )r   r   r   r   r   r   boolr,   r.   r   r   r   r   r   s   @r8   r{  r{  ~  s    
 	

 

 
 [[ [ 	[
 [ [ [ [ [ #Y[ [ [ [ [ [  ![" #[ [z( (r;   r{  c                   d   ^  \ rS rSr S SU 4S jjjrU 4S jr\        SS j5       rSrU =r	$ )	WeightInt4PackMatmuli  rE   c           	         > [        U5      S:X  d   e[        U5      S:X  d   e[        TU ]	  UUUS[        R                  R
                  R                  R                  SS9  g)zA
inputs = [x, w, qGroupSize, qScalesAndZeros]
constant_args = ()
rR   r   N-aoti_torch_cpu__weight_int4pack_mm_cpu_tensorr   )r)   r   r   r   r   	quantizedint4mm_packed_weight_cpur   r   s       r8   r   WeightInt4PackMatmul.__init__  sd     6{a=!Q&&&,,EEMML 	 	
r;   c                    > UR                  S5        [        TU ]	  U5        [        U R                  [
        5      (       a  U R                  U5        g g r   r   r   s     r8   r   WeightInt4PackMatmul.codegen  r_  r;   c                    XX4/nUR                  5       Gt pgUR                  5       u  p[        U5      U/-   n	[        R                  " U	5      n
[	        UR                  5       UR                  5       U	U
5      n[        UUS9$ )N)r   rr   )r|   r,   r   rd   r   r}   rg   r  )ri   r   rB  
qGroupSizeqScalesAndZerosrr   r~   r   nr/   rq   rw   s               r8   r   WeightInt4PackMatmul.create  s~     
4

zz|1gm&99+F#LLNKKM	
 $ 
 	
r;   r   r   )r   r   rB  r   r  r   r  r   r5  r   s   @r8   r  r    s[    
 	

 

 
*/ 

 
  	

 %
 
r;   r  )FNNN)NNF)2collections.abcr   typingr   r   r[   r   torch._prims_commonr   torch.utils._ordered_setr   irr
   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   virtualizedr   r.   r  r,   ry   r   r   r   r   r   r   r   r
  r%  r7  rJ  rV  rh  r{  r  rE   r;   r8   <module>r     s   $     > /    U T  .215#'AIAI AI 	AI
 c]AI SMAI smAI AI AI Xc]+AI D-.AI K AIR 26#'<I<I <I 	<I
 D-.<I K <I <I~8+( 8+vA+) A+HM 0 M `@+ 1 @+Fd
* d
Ny90 y9x(
' (
V0# 0f3$ 3lP
, P
fs
!2 s
lo(& o(f3
, 3
r;   