
    7hH                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJrJrJr  S SKJrJr  S SKJr  S SKJr  S SK
JrJrJrJrJrJrJrJrJr  S SKJrJrJ r   S S	K!J"r"  S SK#r#S S
K#J$r$J%r%J&r&  S SK'J(s  J)s  J*r+  S SK,J-s  J.r/  S SK0r1S SK2r1S SK3J.s  J4r5  S SK6J7r7  S SK8J9r9  S SK:J;r;  S SK<J=r=  S SK>J?r?J@r@JArAJBrBJCrC  S SKDJErE  S SKFJGrGJHrHJIrIJJrJJKrKJLrLJMrMJNrNJOrOJPrP  S SKQJRrR  S SKSJTrTJUrUJVrV  S SKWJXrX  SSKYJZrZJ[r[  SSK\J]r]J^r^J_r_J`r`  SSK[JaraJbrbJcrcJdrdJere  SSKfJgrg  SSKhJiriJjrjJkrkJlrl  SSKmJnrn  SSKoJprpJqrq  SSK.JrrrJsrsJtrtJuruJvrvJwrwJxrxJyryJzrzJ{r{J|r|J}r}J~r~JrJrJrJrJrJrJrJr  SSKJrJrJr  \(       a  S SKJr  S S KJr  SS!KJr  SS"KJr  SS#K.Jr  O\rS$\S%'    S SKr\GR.                  rS&r\" S(5      r\" S)5      r\" S*5      r\\\$4   rS$\S+'   \\\\$4   rS$\S,'   \GRD                  " \5      r\R&                  " \GRJ                  S-S.9r\1GR                  GRL                  r \\S/\\S/4   S0S1\\\\\\S/4   S/S0S14         4   rS$\S2'   GSS3 jr\GRV                  " S&S49 " S5 S65      5       rGSS7 jrGSS8 jrGSS9 jrGSS: jr      GSS; jrGSS< jr/ S=Qr/ S>Qr GS     GSS? jjrGSS@ jr GS     GSSA jjr\GSGSSB jj5       r\GSGSSC jj5       r GS     GS SD jjr    GS!SE jr    GS"SF jrGS#SG jrGS#SH jrGS$SI jr        GS%SJ jr      GS&SK jrGS'SL jrGS(SM jr " SN S15      r\~" S'S49 " SO SP5      5       r\~ " SQ SR\5      5       rGS)SS jr\~ " ST SU\5      5       r\~ " SV SW\5      5       r\" SX5      \" SY5      \" SZ5      \" S[5      \" S\5      \" S]5      S^.rS_\S`'    GS       GS*Sa jjr\~ " Sb Sc\5      5       r\\\$   \\$   /\4   r " Sd Se\5      r " Sf Sg\5      r " Sh Si\5      r\~ " Sj Sk\5      5       r\~ " Sl Sm\5      5       r\~ " Sn So\5      5       rGS+Sp jrGS+Sq jr     GS,             GS-Sr jjr      GS.Ss jrGS/St jr\~ " Su Sv\5      5       r\~ " Sw Sx\5      5       r\~ " Sy Sz\5      5       r\~ " S{ S|\5      5       r\~ " S} S~\5      5       r\~ " S S\5      5       r\~ " S S\5      5       r\~ " S S\5      5       r " S S\5      r\~ " S S\5      5       r\~ " S S\5      5       r\~ " S S\5      5       r      GS0S jrGS1S jr " S S5      r\~ " S S\5      5       r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r\~ " S S\5      5       r " S S\5      r\~" S'S49 " S S\\^5      5       r\~" S'S49 " S S\\5      5       r " S S\5      r " S S\5      r " S S\5      r\~ " S S\5      5       r\~ " S S\5      5       r\~" S'S49 " S S\5      5       r " S S\5      r " S S\5      r\\\\\\\\\\\4      4   r " S S5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r\~" S'S49 " S S\5      5       Gr  " S SG\ 5      Gr " S SG\5      Gr\~" S'S49 " S SG\ 5      5       Gr\~" S'S49 " S SG\5      5       Gr " S SG\5      Gr " S SG\5      Gr " S S\5      Gr " S SG\5      Gr " S SG\5      Gr	 " S SG\5      Gr
 " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr\~" S'S49 " S S5      5       Gr " S SG\5      Gr\~" S'S49 " S SG\5      5       Gr\~ " S S\5      5       Gr " S SG\5      Gr\GRV                   " S S\5      5       Gr " S S/G\5      Gr " S SG\5      Gr\~" S'S49 " S S\5      5       GrGS2S jGr \~" S'S49 " S GS G\5      5       Gr!\~" S'S49 " GS GSG\5      5       Gr"    GS3GS jGr#\~" S'S49 " GS GSG\5      5       Gr$ " GS GSG\5      Gr% " GS GS	\5      Gr&\~ " GS
 GSG\&5      5       Gr'\~ " GS GSG\&5      5       Gr( " GS GSG\5      Gr) " GS GSG\)5      Gr*GS4GS jGr+GS4GS jGr,g! \ a    SrS'r GNf = f(5      )annotationsN)	GeneratorIterableSequence)AbstractContextManagernullcontext)Enum)partial)	AnyCallableClassVarLiteralOptionaloverloadTYPE_CHECKINGTypeVarUnion)assert_neverNever	TypeAlias)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metrics)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)
&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvstatically_known_trueSymTypes
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reordering)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)Node)CUDATemplate)GraphLowering)IndentedBufferr   rc   TF_T_U_V_IntLike_NumLikez  prefix	TensorBoxr   IRNode_NodeOrNodesc                .    [        U [        [        45      $ N)
isinstanceintr   xs    L/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/ir.py
_is_staticrw      s    a#w((    )frozenc                  R    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S
\S'   S\S'   Srg)GraphPartitionSignature   OrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraphz	list[str]constant_names N__name__
__module____qualname____firstlineno____annotations____static_attributes__r   rx   rv   r{   r{      s/     ,+ GF (' rx   r{   c                &   ^ SU4S jjmT" U 5        g )Nc                  > U c  g [        U [        [        45      (       a  U  H  nT" U5        M     g [        U [        5      (       a   U R	                  5        H  nT" U5        M     g [        U [
        [        [        [        [        R                  R                  R                  [        [        [        [         4	5      (       d   S[#        U 5       S35       eg )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])rr   listtupledictvalues
ExpandViewDynamicScalarAssertScalarrm   sympylogicboolalgBooleanr   rs   EffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     rv   r   %validate_ir.<locals>._check_tensorbox   s     =e}-- & t$$ & ' ! KK''//#)
   e%jk rx   )r   Optional[_NodeOrNodes]returnNoner   )node_or_nodesr   s    @rv   validate_irr      s    < ]#rx   c                F   ^  [        T [        5      (       d   eSU 4S jjnU$ )Nc                 0   > [        [        T5      " U 0 UD6$ rq   )getattrr^   )argskwargsnames     rv   fnops_wrapper.<locals>.fn  s    sD!42622rx   )r   objectr   r   r   r_   )rr   str)r   r   s   ` rv   ops_wrapperr      s"    dC    3 Irx   c           
     f   ^ [        [        U [        [        U 5      5      5      5      mSU4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf rq   lenrange)indexi	inv_orders     rv   reindex inverse_reorder.<locals>.reindex
  sC    5zS^+++-23u:->?->il#->???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @rv   inverse_reorderr     s*    Sc%j 123I@ Nrx   c                   ^  SU 4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf rq   r   )r   r   r   s     rv   r   same_reorder.<locals>.reindex  sB    5zSZ''').s5z):;):AeAh):;;;r   r   r   )r   r   s   ` rv   same_reorderr     s    < Nrx   c                   ^ ^ SU U4S jjnU$ )Nc                    > T" T" U 5      5      $ rq   r   )r   reindex1reindex2s    rv   r    fuse_reindexing.<locals>.reindex  s    ((rx   )r   r   r   zSequence[_V]r   )r   r   r   s   `` rv   fuse_reindexingr     s    ) ) Nrx   c                <    U(       a  [        U 5      $ [        U 5      $ rq   )r(   r'   )ru   unbacked_onlys     rv   get_free_symbolsr   #  s    $Q''Arx   )   r      r5   )   r   r   r   r5   c                <    Uc  [        U 5      nU$ [        X5      nU$ )z)
Convert strides to fill order (argsort)
)rI   rJ   )seq	shape_env
sorted_idxs      rv   get_fill_orderr   .  s-     $+CL
  !0
rx   c                    [        U 5       VVs0 s H  u  pX!_M	     nnn[        [        U 5      5       Vs/ s H  oCU   PM	     nnU$ s  snnf s  snf )zx
Convert stride order to fill order
For channel last format,

stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
)	enumerater   r   )r   idxposlookupr   
fill_orders         rv   stride_order2fill_orderr   <  sR     (1'78'783ch'7F8%*3u:%67%6)%6J7 97s
   AAc                    [        X5      n[        [        U 5      5       Vs/ s H  nSPM     nn[        U5       H	  u  pVXTU'   M     U$ s  snf )z!
Convert strides to stride order
r   )r   r   r   r   )r   r   r   _outr   elems          rv   get_stride_orderr   H  sL     !/s >JCHo
&o1oC
&Z(D	 )J 's   A
c                    g rq   r   ru   guard_shapes     rv   ir_node_to_tensorr   U  s    KNrx   c                    g rq   r   r   s     rv   r   r   Y  s    LOrx   c                   U c  g U(       d%  [         R                  R                  R                  nO[        nU R                  5        Vs/ s H
  o2" U5      PM     nn[        U 5      (       a0  U R                  5       R                   Vs/ s H
  o2" U5      PM     nnO[        R                  U5      nU R                  5       nU R                  5       n[        U5      n[        U5      n[         R                  R                  R                  R                  5          [         R"                  " XEXgS9R%                  5       nS S S 5        U$ s  snf s  snf ! , (       d  f       W$ = f)N)sizestridedtypedevice)r`   graphsizevars	size_hintr   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerN   r   suppress_guardstorchempty_stridedzero_)	ru   r   shape_fnsr   r   r   r   ts	            rv   r   r   ]  s    	y 77##--!".AHQKD.Q'(||~'<'<='<!(1+'<=2248KKME\\^F"4(D$V,F	
			#	#	3	3	5E

%' 	
 
6 H / > 
6	5 Hs   	EE$E
E c                D    [        U [        5      (       a
  U (       d  S /$ U $ rq   )rr   r   values    rv   may_convert_to_optionalr  y  s!     %u vLrx   c                @   [        U [        5      (       d  U c  U $ [        U [        R                  5      (       a  U R                  $ [        U [
        [        45      (       a  [        U R                  5       5      $ [        SU  S[	        U 5      R                   S35        g )Nzget_device_type(: ))rr   r   r   r   r   rn   
OutputSpecget_device_typer   r   r   rt   s    rv   r  r    sz     !SQY	Au||	$	$vv	A
+	,	,q||~..#A3ba)9)9(:!<=rx   c                    [        U 5      nUS;   a  [        [        U S35      S:X  a  ggUb  [        U5      =nc  gSSKJn  [        U[        5      (       d   e[        X#5      $ )N)cpucuda_backendtritonTFr5   )TritonScheduling)	r  r   r6   r:   codegen.tritonr  rr   r   
issubclass)ru   r   device_schedulingr  s       rv   	is_tritonr    sn    QF  6fXX./8;!:6!BBK0'....'::rx   c                    [        U 5      S:H  $ )Nr  )r  rt   s    rv   is_cpur    s    1&&rx   c           	       ^ ^ [        T [        5      (       a  T R                  5       c  g[        UU 4S j[	        [        T R                  5       5      S-
  5       5       5      n[        R                  R                  R                  T R                  5       S   5      S:H  =(       d=    [        R                  R                  R                  T R                  5       S   5      S:*  nU=(       a    U$ )NFc              3     >#    U  HE  n[         R                  R                  R                  TR	                  5       U   5      T-  S :H  v   MG     g7f)r   N)r`   r   r   size_hint_or_throw
get_stride).0r   	alignmentru   s     rv   	<genexpr>-is_aligned_realized_tensor.<locals>.<genexpr>  sC      /A 
			,	,Q\\^A->	?)	KPQQ/s   AAr5   )rr   rn   maybe_get_strideallr   r   r  r`   r   r   r  r   )ru   r  aligned_stridesaligned_last_dims   ``  rv   is_aligned_realized_tensorr     s    a  A$6$6$8$@ s1<<>*Q./ O 	
++ALLN2,>?1D 	F77..qzz|B/?@AE  //rx   c                   [        U5      [        U 5      :X  a  [        U 5      [        U5      :X  d   e[        X U5       H  u  p4n[        R                  R                  R                  US5      (       a  M7  [        R                  R                  R                  XE5      (       a  Mg  [        R                  R                  R                  U5      [        R                  R                  R                  U5      :X  a  M    g   g)zH
Returns true if the strides are equal, ignoring dimensions of size 1 .
r5   FT)r   r   r`   r   r   statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         rv   significant_strides_equalr+    s     u:X&3x=CM+III5H57700a88ww77
 
''""0048H8H8V8V9
 
  6 rx   c                ^   [        U 5      (       d  U $ [        S [        XR                  5       5       5       5      (       a  U $ [	        XR                  5       U R                  5       5      (       d  U $ [        U 5      u  p#/ UR                  Qn[        U R                  5       5       H<  u  pV[        R                  R                  R                  US5      (       d  M6  X   XE'   M>     [        UR                  UR                  UR                   UUR"                  5      n[%        ['        X'S95      $ )a  
Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
dimensions - size 0 or 1 - will be updated.

If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
c              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7frq   r`   r   r   r#  r  r)  r*  s      rv   r  2try_match_insignificant_strides.<locals>.<genexpr>  s1      7FB 	
00887   8:r5   datalayout)r   r  r   r  r+  r   as_storage_and_layoutr   r   r`   r   r   r"  FixedLayoutr   r   r   offsetrm   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           rv   try_match_insignificant_stridesr?    s    !((
 '#4#4#67   $W.?.?.A6??CTUU/7G%:$$%J&//+,7700A66#JJM - J _'EFFrx   c                    U R                   R                  SS9S   n[        UR                  5       VVs/ s H  u  p#UPM	     snnUR                  S'   SSKJn  U" U 5        g s  snnf )Noutput)opr   user_visible_output_idxs)record_original_output_strides)r   
find_nodesr   r   metatorch._inductor.compile_fxrD  )gmoutput_noder   r   rD  s        rv   gm_original_output_stridesrJ    sd    ((%%%215K#K$4$454554K/0 J"2&4s   A#c                    [        5       nU  H9  nU[        UR                  5       SS9-  nU[        UR                  5       SS9-  nM;     [	        U5      $ )NFr   )r0   r   r   r  r   )inputssym_varsinps      rv   get_symbolic_inputsrP    sP    !+H$S\\^5II$S^^%5UKK  >rx   c                  x   \ rS rSr% \" 5       rS\S'   \R                  " SS9r	S\S'   \R                  " SS9r
S\S	'   \R                  " SS9rS
\S'   \\R                  SFS j5       5       rSGS jrSHS jrSIS jrSJS jrSKS jrSLS jrSMSNS jjr SO       SPS jjrSQS jrSRS jrSSS jrSTS jrSUS jrSVS jrSWS jrSXS jrSYS jr \!SZS j5       r"S[S jr#SWS  jr$S\S! jr%S]S^S# jjr&S_S$ jr'S`S% jr(SWS& jr)SaS' jr*SbS( jr+ScS) jr,SYS* jr-SdS+ jr.S\S, jr/SWS- jr0S]SeS. jjr1SfS/ jr2SHS0 jr3SgS1 jr4SHS2 jr5 Sh     SiS3 jjr6SjS4 jr7SkS5 jr8 Sh     SlS6 jjr9SmS7 jr:SnS8 jr;SoS9 jr<SpS: jr= Sh   SqS; jjr>S\S< jr?SrS= jr@SWS> jrASWS? jrBSsS@ jrCStSA jrDSdSB jrEStSC jrF\G(       a  \!SQSD j5       rHSErIg"SErIg")urn   i  zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodec              #     #    [         R                  nX-  [         l         S v   U[         l        g ! U[         l        f = f7frq   )rn   rR  )rU  olds     rv   current_originsIRNode.current_origins  s4      %%"%-	*&)F#cF#s   A1 A>Ac                0    [         R                  XU5        g rq   )r   __setattr__)selfattrr   s      rv   _post_init_setattrIRNode._post_init_setattr  s     	4u-rx   c                    U R                  S[        U R                  5      5        U R                  S[        R                  (       a  [
        R                  " 5       OS 5        U R                  SS 5        g )NrU  rW  rY  )rb  r0   rR  r6   debug_ir_tracebackrW  format_stackr`  s    rv   __post_init__IRNode.__post_init__  sV    	:d6K6K+LMV5N5N//1TX	
 	t4rx   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7frq   r   r  deps     rv   r  (IRNode.get_read_names.<locals>.<genexpr>'       ?.>s((.>   r0   	get_readsrg  s    rv   get_read_namesIRNode.get_read_names&      ?dnn.>???rx   c                    U R                   $ rq   )rW  rg  s    rv   get_tracebackIRNode.get_traceback)  s    ~~rx   c                    U R                   $ rq   rY  rg  s    rv   get_origin_nodeIRNode.get_origin_node,      rx   c                    g rq   r   rg  s    rv   get_defining_opIRNode.get_defining_op/      rx   c                d    S[        U SS5       3nU(       a  [        U5      S:  a  US S  S3nU/$ )Nzorigins=rU   @   =   z...)r   r   )r`  shortenrU  s      rv   common_reprIRNode.common_repr2  s@    WT9b9:;s7|b( "c*Gyrx   c                .   [        U5      [        U R                  U5      5      -   n[        [        [        U5      5      nU(       a5  [	        SR                  U5      5      n[        U 5      R                   SU S3$ [        U 5      R                   SU S3$ )Nz,
z(
z
)(r  )r   r  mapr   indentjoinr   r   )r`  linesr  	multiline	new_liness        rv   
str_helperIRNode.str_helper9  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44rx   c                    U R                   $ rq   r   rg  s    rv   r   IRNode.get_dtypeD      zzrx   c                D     U R                  5       $ ! [         a     g f = frq   )r   NotImplementedErrorrg  s    rv   maybe_get_dtypeIRNode.maybe_get_dtypeG  s&    	>>##" 		    
c                2    [        S[        U 5       S35      e)Nz#get_layout() is not implemented by !r  r   rg  s    rv   r   IRNode.get_layoutM  s    !$GT
|ST"UVVrx   c                D     U R                  5       $ ! [         a     g f = frq   )r   r  rg  s    rv   maybe_get_layoutIRNode.maybe_get_layoutP  &    	??$$" 		r  c                "    U R                  5       $ rq   )r   rg  s    rv   get_output_specIRNode.get_output_specV  s      rx   c                D     U R                  5       $ ! [         a     g f = frq   )r  r  rg  s    rv   maybe_get_output_specIRNode.maybe_get_output_specY  s(    	''))" 		r  c                >    [        U R                  5       [        5      $ )z4True for single tensor output (excludes MultiOutput))rr   r  Layoutrg  s    rv   has_tensor_outputIRNode.has_tensor_output_  s    $446??rx   c                2    [        S[        U 5       S35      e)Nz!get_size() is not implemented by r  r  rg  s    rv   r   IRNode.get_sizec  s    !$Ed4j\QR"STTrx   c                D     U R                  5       $ ! [         a     g f = frq   )r   r  rg  s    rv   maybe_get_sizeIRNode.maybe_get_sizef  %    	==?"" 		r  c                "    U R                  5       $ rq   r   rg  s    rv   r'  IRNode.shapel  s    }}rx   c                4    [        U R                  5       5      $ rq   )r[   r   rg  s    rv   	get_numelIRNode.get_numelp  s    T]]_--rx   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ Nr   r`   r   r   r-   r   Eqr  rg  s    rv   is_zero_elementsIRNode.is_zero_elementss  0    ww55ehht~~?OQR6STTrx   c                0    [        S[        U 5       35      e)a  
If the IRNode refers to data which has not been materialized (e.g.,
it is a Pointwise/Reduction that could potentially have more
compute fused into it), realize the IRNode into physical memory,
ending the possibility of fusing into it, but allowing, e.g., multiple
users to access the data without having to recompute.

Check StorageBox.realize for a particularly notable implementation.

TODO(ezyang): I think, in principle, every IRNode should have an
implementation of this, and most of the time no-op is OK, but you
really do have to audit each IRNode for this, so for now, raise
an error if it's not implemented.  Note that some code in graph.py
will catch this thrown error and suppress it with a warning.
zrealize NYI on r  rg  s    rv   realizeIRNode.realizev  s      "ODJ<"@AArx   Nc                0    [        S[        U 5       35      e)Nzcodegen_reference NYI on r  r`  writers     rv   codegen_referenceIRNode.codegen_reference  s    !$=d4j\"JKKrx   c                    g rq   r   rg  s    rv   r   IRNode.get_device  r  rx   c                0    U R                  5       nUc   eU$ rq   )r   r`  r   s     rv   get_device_or_errorIRNode.get_device_or_error  s    "!!!rx   c                    gNFr   rg  s    rv   has_exceeded_max_readsIRNode.has_exceeded_max_reads      rx   c                >    [        [        U 5      R                  5      erq   r  r   r   rg  s    rv   make_loaderIRNode.make_loader      !$t*"5"566rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   make_indexerIRNode.make_indexer  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   r  IRNode.get_stride  r  rx   c                D     U R                  5       $ ! [         a     g f = frq   )r  r  rg  s    rv   r  IRNode.maybe_get_stride  r  r  c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_nameIRNode.get_name  r  rx   c                D     U R                  5       $ ! [         a     g f = frq   )r  r  rg  s    rv   maybe_get_nameIRNode.maybe_get_name  r  r  c                z     U R                  5       [        R                  R                  ;   $ ! [         a     gf = fr  )r  r`   r   graph_inputsr  rg  s    rv   is_input_bufferIRNode.is_input_buffer  s4    	==?agg&:&:::" 		s   *- 
::c                    gr  r   r`  	thresholds     rv   has_large_inner_fnIRNode.has_large_inner_fn  r  rx   c                    g rq   r   r`  userss     rv   
mark_reuseIRNode.mark_reuse      rx   c                    g rq   r   rg  s    rv   realize_hintIRNode.realize_hint  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   unwrap_viewIRNode.unwrap_view  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   freeze_layoutIRNode.freeze_layout  r  rx   c                >    [        [        U 5      R                  5      erq   r  r`  r   allow_paddings      rv   freeze_layout_with_stride_order&IRNode.freeze_layout_with_stride_order       "$t*"5"566rx   c                >    [        [        U 5      R                  5      erq   r  r`  r   s     rv   freeze_layout_with_fill_order$IRNode.freeze_layout_with_fill_order  r  rx   c                >    [        [        U 5      R                  5      erq   r  r`  r   s     rv   freeze_layout_with_same_order$IRNode.freeze_layout_with_same_order  r  rx   c                >    [        [        U 5      R                  5      erq   r  r`  exact_stridesr  s      rv    freeze_layout_with_exact_strides'IRNode.freeze_layout_with_exact_strides  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_read_writesIRNode.get_read_writes  r  rx   c                6    U R                  5       R                  $ rq   r  readsrg  s    rv   rs  IRNode.get_reads      ##%+++rx   c                4    [        U R                  5       5      $ rq   )r   rs  rg  s    rv   	num_readsIRNode.num_reads  s    4>>#$$rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_storage_numelIRNode.get_storage_numel  r  rx   c                >    [        [        U 5      R                  5      erq   r  r`  r   s     rv   get_free_symbol_usesIRNode.get_free_symbol_uses  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_reduction_typeIRNode.get_reduction_type  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_reduction_sizeIRNode.get_reduction_size  r  rx   c                    gr  r   rg  s    rv   	is_externIRNode.is_extern  r  rx   c                    gr  r   rg  s    rv   is_no_opIRNode.is_no_op  r  rx   c                >    [        [        U 5      R                  5      erq   r  r  s     rv   constant_to_deviceIRNode.constant_to_device  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_mutation_namesIRNode.get_mutation_names  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_operation_nameIRNode.get_operation_name  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   get_inputs_that_alias_output#IRNode.get_inputs_that_alias_output  r  rx   c                    g rq   r   rg  s    rv   r   IRNode.dtype  s    (+rx   r   )rU  zOrderedSet[Node]r   zGenerator[None, None, None])ra  r   r   r   r   r   r   r   r   OrderedSet[str])r   rV  r   rX  r   zOptional[Operation]T)r  r   r   Sequence[str])TT)r  zSequence[object]r  r   r  r   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   r  )r   zOptional[Layout]r   r  )r   zOptional[OutputSpec]r   r   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r   r   Optional[str]rq   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r  Optional[int]r   r   r  rs   r   r   r   rn   Fr   	list[int]r  r   r   r   r   rb  r   r   r   list[_IntLike]r   r   r  re  r  r   r   r   r   zdependencies.ReadWritesr   zOrderedSet[Dep]r   rs   r   ri   r   r   r   r}   r   Sequence[sympy.Expr]r   rT  r   rn   r   rD  )Jr   r   r   r   r0   rR  r   dataclassesfieldrU  rW  rY  staticmethod
contextlibcontextmanagerr\  rb  rh  rt  rx  r|  r  r  r  r   r  r   r  r  r  r  r   r  propertyr'  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rs  r  r  r"  r%  r(  r+  r.  r1  r4  r7  r:  r   r   r   r   rx   rv   rn   rn     s'   2<,/>  +00e<G_<%0%6%6E%BI"B+6+<+<%+HK(H*  *.5@  PT	5%	504	5HL	5		5W!@U  .UB$L
777777 7<77/37	7
77 DI7+7<@7	7
7,%7 %*7!7	!7
777777 	+ 
+ rx   c                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jr S   S S jjrS!S jrSrg)"	Operationi  c                    S U l         g rq   operation_namerg  s    rv   rh  Operation.__post_init__  s
    -1rx   c                    [         erq   r  rg  s    rv   r   Operation.get_device      !!rx   c                @    [        U S5      (       d   eU R                  $ NrY  )hasattrrY  rg  s    rv   r|  Operation.get_origin_node
  s!    t]++++rx   c                @    [        U S5      (       d   eU R                  $ )NrU  )r  rU  rg  s    rv   get_originsOperation.get_origins  s    tY''''||rx   c                8    U R                   c   eU R                   $ rq   ry  rg  s    rv   r7  Operation.get_operation_name  s     ""..."""rx   c                    gr  r   rg  s    rv   r+  Operation.is_extern  r  rx   c                    gr  r   rg  s    rv   r.  Operation.is_no_op  r  rx   c                    [         erq   r}  rg  s    rv   r  Operation.get_read_writes  r  rx   c                &    XR                  5       ;   $ rq   )rt  )r`  r   s     rv   
is_user_ofOperation.is_user_of  s    **,,,rx   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7frq   rl  rm  s     rv   r  +Operation.get_read_names.<locals>.<genexpr>#  rp  rq  rr  rg  s    rv   rt  Operation.get_read_names"  rv  rx   c                6    U R                  5       R                  $ rq   r  rg  s    rv   rs  Operation.get_reads%  r  rx   c                    [         erq   r}  rg  s    rv   get_outputsOperation.get_outputs(  r  rx   c                    [        5       $ rq   r/   rg  s    rv   get_unbacked_symbol_defs"Operation.get_unbacked_symbol_defs+  
    |rx   c                    [        5       $ )a  
When unbacked_only=True:
Returns the unbacked symbols which are required to be in scope in
order to successfully perform codegen for this buffer.  For example,
a buffer that corresponds to an extern kernel call that takes i0 as
an argument would return {i0} here.  This is used to generate necessary
dependencies that ensure we actually bind i0 in codegen before you
try to use it.

Note that this is NOT transitive; in particular, if this buffer takes
in as input another buffer with dynamic shape (e.g., (i0,)), we will
not report it here, because you will already have a dependency
on that buffer, which will eventually have a dependency on i0 if
necessary.

When unbacked_only=False:
Similar to `unbacked_only=True` but including all free symbols
instead of only free unbacked symbols.
r/   r!  s     rv   r"  Operation.get_free_symbol_uses.  s    , |rx   c                    g)z
Gets extra global memory size needed by this buffer.
Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
r   r   rg  s    rv   get_workspace_sizeOperation.get_workspace_sizeF  s    
 rx   ry  Nr>  rQ  rA  )r   rT  r[  rI  rg  )r   r   r   r   r?  rh  r   list[Buffer]r   r}   r`  rk  ri  )r   r   r   r   rh  r   r|  r  r7  r+  r.  r  r  rt  rs  r  r  r"  r  r   r   rx   rv   rw  rw    sc    2" #"-@," %*!	!0rx   rw  c                  x  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'    S    S!S
 jjrS"S jrS#U 4S jjrS$S jr\r	S%S jr
S&S jrS'S jrS'S jr\S(S j5       r\\R$                  4S)S jj5       r\S*S j5       rS+S jr\S$S j5       rS,S-S jjrS S.S jjrS/S jrS0S jrS1S jrS2S jrS3S jrS4S jrSr U =r!$ )5LoopsiN  rT  r   rF  r   Callable[..., Any]inner_fnrZ  rangesc                   ^ [        5       R                  " / U4S jU R                   5       QU R                  T5      P76 $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7frq   r   r  er   s     rv   r  -Loops.get_free_symbol_uses.<locals>.<genexpr>Y  s     F+Qq-00+   )r0   unionr  inner_fn_free_symbolsr!  s    `rv   r"  Loops.get_free_symbol_usesU  s>     |!! 
F$++F
&&}5
 	
rx   c                   U R                  SU R                  R                   S3[        U R                  5      U R                  5       /U Vs/ s H  o" S[        X5       3PM     sn-   SU R                  < 3/-   5      $ s  snf )N'=origin_node=)r  r   r   r   r   inner_fn_strr   rY  )r`  namesr   s      rv   _to_strLoops._to_str]  s    DKK$$%Q'DJJ!!#
 <AA54q,-.5AB d..1234
 	
 Bs   B
c                "   > [         TU ]  5         g rq   )superrh  r`  	__class__s    rv   rh  Loops.__post_init__h  s    rx   c                $    U R                  S5      $ )Nr  r  rg  s    rv   __str__Loops.__str__k  s    ||K((rx   c                    U R                   $ rq   r   rg  s    rv   r   Loops.get_devicep      {{rx   c                    U R                   $ rq   r{  rg  s    rv   r|  Loops.get_origin_nodes  r~  rx   c                    U R                   $ rq   r  rg  s    rv   r   Loops.get_sizev  r  rx   c                    U R                   $ rq   r  rg  s    rv   get_pointwise_sizeLoops.get_pointwise_sizey  r  rx   c                    UR                  SS 5      nUR                  SS 5      nU " U0 UD6nUR                  SU5        UR                  SU=(       d    UR                  5        [        R	                  U5      $ )NrY  rW  )poprb  rW  rm   create)clsr   r   rY  tbrs         rv   r  Loops.create|  sm    jj5ZZT*   	
]K8	["*;<""rx   c                    [        U 5       VVs/ s H0  u  p#US:X  a  [        R                  R                  O
[	        X5      PM2     snn$ s  snnf Nr5   )r   r   SZerorZ   )r  rl   nr   s       rv   _indexLoops._index  sI     "&)
) FEGGLL(Fv(QQ)
 	
 
s   7A
c                |   [        [        R                  " 5       5      n[        R                  " U5         [        R
                  " [        SS5         U R                  " U R                  5       6   UR                  5       sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        g ! , (       d  f       g = fNallow_indexingT)
rB   r`   MockHandlerset_ops_handlerr   r   r   r  inner_fn_argsgetvalue)r`  	opcounters     rv   inner_fn_opcountLoops.inner_fn_opcount  sy     1	i(LL)94@MM4--/0%%' A@ )(@@@ )((s#   B--B?	B-
B 	B--
B;c                :    U R                  U R                  5      4$ rq   )r  r  rg  s    rv   r  Loops.inner_fn_args  s    DKK(**rx   c                t    [         R                  R                  " U R                  /U R	                  5       Q76 $ rq   )r`   KernelFormatterHandlerir_to_stringr  r  rg  s    rv   r  Loops.inner_fn_str  s3    ''44MM
 ..0
 	
rx   c                z    Uc  Sn[        U[        R                  5      nU R                  5       R                  U:  $ r  )maxr6   realize_opcount_thresholdr  num_opsr  s     rv   r  Loops.has_large_inner_fn  s9    I	6#C#CD	$$&..::rx   c                `    U R                  U R                  5      n[        U R                  X!S9$ NrL  )r  r  r=   r  )r`  r   r   s      rv   r  Loops.inner_fn_free_symbols  s%    DKK(#DMM5VVrx   c                   [         R                  " [        SS5         U R                  5       (       aJ  [	        U R                  5       U R                  5       U R                  5       5      R                  sS S S 5        $ [	        U R                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )	r   r   r   r%  r?   r  r   r(  r  rg  s    rv   rs  Loops.get_reads  s    \\.*:DA&&((*$$&MMO++- % BA +$$&MMO % BAAs   AB8=1B88
Cc                H    [        U R                  5       R                  5      $ rq   )r0   r  read_buffersrg  s    rv   rt  Loops.get_read_names  s    $//1>>??rx   c                H    [        U R                  5       R                  5      $ rq   )r   r  r  rg  s    rv   r  Loops.num_reads  s    4((*7788rx   c                2    [        S[        U 5       S35      e)Nz+get_reduction_size() is not implemented by r  r  rg  s    rv   r(  Loops.get_reduction_size      !9$t*QG
 	
rx   c                2    [        S[        U 5       S35      e)Nz+get_reduction_type() is not implemented by r  r  rg  s    rv   r%  Loops.get_reduction_type  r  rx   c                2    [        S[        U 5       S35      e)Nz+constant_to_device() is not implemented by r  r  r  s     rv   r1  Loops.constant_to_device  r  rx   r   r`  rk  )r  rD  r   r   r>  r[  rQ  rA  rJ  )r   r   r   r   r   rm   )r  rZ  rl   r4   r   rK  )r   rC   r   zSequence[Sequence[_IntLike]]rq   r\  r   r   r   OrderedSet[Symbol]rh  r?  ri  rl  rN  rn  )"r   r   r   r   r   r"  r  rh  r  __repr__r   r|  r   r  classmethodr  rr  r4   INDEXr  rK   r  r  r  r  r  rs  rt  r  r(  r%  r1  r   __classcell__r  s   @rv   r  r  N  s       %*
!
	!
	
 ) H  	# 	# :>** 
 
 ( (+ 
 

;W@9




 
rx   r  c                   UR                   (       a   [        R                  " [        S5      U5      $ [        R                  " SU5      $ )Nnanr   )is_floating_pointr^   constantfloat)r   r   s     rv   nop_loader_fnr    s1    ||E%L%00||Au%%rx   c                  V    \ rS rSrS	S jrS
S jrSS jr        SS jrSS jrSr	g)	Pointwisei  c                t    U R                  5       (       a  [        [        U R                  S9$ U R                  $ Nr  )r  r
   r  r   r  rg  s    rv   r  Pointwise.make_loader  s,      ""=

;;}}rx   c                    / $ rq   r   rg  s    rv   r(  Pointwise.get_reduction_size  s    	rx   c                    g rq   r   rg  s    rv   r%  Pointwise.get_reduction_type  r  rx   c                |    U R                  5       n[        R                  " U=(       d    SU" U5      U" U5      5      $ Nunnamed)r  r^   storer`  output_nameindexervarsloaders        rv   store_outputPointwise.store_output  s2     !!#yy1	74=&,OOrx   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        XR
                  X R                  S9$ FMove this to a given device. Requires that all reads are to constants.override_devicer   r   r  r  )r  r   r   ConstantBufferr  r   r  r`  r   r&  s      rv   r1  Pointwise.constant_to_device  sB    !!#n.?HPf[[
 	
rx   r   NrU  rl  rN  r#  rO  r$  !Callable[[Sequence[Expr]], Never]r%  rK  r   r   rn  )
r   r   r   r   r  r(  r%  r'  r1  r   r   rx   rv   r  r    sF    P"P 3P 	P
 
P
rx   r  c                  R    \ rS rSr% S\S'   SrS\S'   S
S jr        SS jrS	rg)Scatteri  rX  output_indexerNrE   scatter_modec           	         U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  S9$ )r+  r,  )r   r   r  r  r5  r6  )	r  r   r   r.  r4  r   r  r5  r6  r/  s      rv   r1  Scatter.constant_to_device  s[    !!#n.?HP**;;..**
 	
rx   c                    U R                  5       nUc  Sn[        R                  " UU" U R                  U5      5      U" U5      U R                  S9$ )Nr   )mode)r  r^   r!  r5  r6  r"  s        rv   r'  Scatter.store_output
  sT     !!##KyyD''-.4L""	
 	
rx   r   rn  r1  )	r   r   r   r   r   r6  r1  r'  r   r   rx   rv   r4  r4    sB    44"L)"

"
 3
 	

 

rx   r4  
logical_ormaximumminimummuladdbitwise_xor)anyr  minprodsumxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                   ^ ^^ T [         ;   a	  [         T    $ T S;   a        SUUU 4S jjnU$ T S:X  a        SS jnU$ [        ST  35      e)Nargmaxargminc                  > U u  p#Uu  pETS:X  a  [         R                  " X$5      nO[         R                  " X$5      n[         R                  " X$5      n[	        T5      (       a  [         R
                  " X"5      n[         R
                  " XD5      n	[         R                  " U[         R                  " X5      5      n[         R                  " U[         R                  " X5      5      nT(       a  [         R                  " X55      O[         R                  " X55      n
[         R                  " U[         R                  " Xz5      5      n[         R                  " XbU5      [         R                  " XcU5      4$ )NrK  )	r^   ltgteqr!   ner<  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              rv   argmax_combine_fn3get_reduction_combine_fn.<locals>.argmax_combine_fn-  s     !G G)vvg/vvg/FF7,Ee$$&&2&&2~~dCFF7,DEucoog.OP ' w(VVG- 
 >>$(CDD		$1		$1 rx   welford_combinec                \    U u  p#nUu  pVnXR-
  nXG-   n	Xy-  n
X(U
-  -   X6-   X-  U-  U
-  -   U	4$ rq   r   )rS  rT  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              rv   welford_combine_fn4get_reduction_combine_fn.<locals>.welford_combine_fnN  s]     &'"F(%&"F(OE!,J -I**emh6BB rx   zunknown reduction_type=)rS  tuple[object, object]rT  ro  r   tuple[OpsValue, OpsValue])rS  #tuple[OpsValue, OpsValue, OpsValue]rT  rq  r   rq  )rG  r  )r_  r   r^  r`  rm  s   ```  rv   get_reduction_combine_fnrr  %  s     --#N33	/	/	$	)>	&	 	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMrx   c                  4  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S!S
 jr\rS"S#U 4S jjjrS$S jrS%S jr	          S&S jr
S'S jrS(S jrS"S#S jjrS)S jr\ S*                   S+S jj5       r\          S,S j5       r\\R(                  S4                   S-S jj5       r\      S.S j5       r\      S.S j5       r\        S/S j5       r\      S0S j5       r\ S*               S1S jj5       r\            S2S j5       r\                        S3S j5       r\ S*                     S4S jj5       r\                      S5S j5       rS rU =r $ )6	Reductionid  rZ  reduction_rangesrD   r_  rF  	src_dtyperH   reduction_hintc                $    U R                  S5      $ )N)r  ru  r_  r  rg  s    rv   r  Reduction.__str__l  s    ||LMMrx   c                |   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7frq   r  r  s     rv   r  1Reduction.get_free_symbol_uses.<locals>.<genexpr>s  s     P:OQq-00:Or  )r  r"  r0   r  ru  r`  r   r  s    `rv   r"  Reduction.get_free_symbol_usesq  s7    w+M:Z\=O=OP$:O:OP>
 
 	
rx   c                    U R                   $ rq   )ru  rg  s    rv   r(  Reduction.get_reduction_sizev  s    $$$rx   c                    U R                   $ rq   )r_  rg  s    rv   r%  Reduction.get_reduction_typey      """rx   c           	         [         R                  " U R                  U R                  U R                  U R                  X45      5      n[         R                  " U=(       d    SU" U5      U5      $ r  )r^   	reductionr   rv  r_  r  store_reduction)r`  r#  r$  r%  reduction_varsr   s         rv   r  Reduction.store_reduction|  sU     JJNNMM$/	
 "";#;)WT]ERRrx   c                X    [        U R                  5      [        U R                  5      -   $ rq   )r   r  ru  rg  s    rv   index_lengthReduction.index_length  s!    4;;#d&;&;"<<<rx   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nX4$ rq   )r  r  ru  r4   R0_INDEX)r`  r   rindexs      rv   r  Reduction.inner_fn_args  s6    DKK(T22DMMBrx   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      n[        U R                  X#US9$ r  )r  r  ru  r4   r  r=   r  )r`  r   r   r  s       rv   r  Reduction.inner_fn_free_symbols  sF    DKK(T22DMMB#MM5
 	
rx   c                   U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  U R                  [        R                  S9$ )r+  r,  r   r   r  r  ru  r_  rv  rw  )r  r   r   r.  rt  r   r  ru  r_  rv  rH   DEFAULTr/  s      rv   r1  Reduction.constant_to_device  sk    !!#n.?HP**;;!22..nn(00	
 		
rx   Nc	                >   [         R                  R                  R                  U5      n	[         R                  R                  R                  [	        U5      5      n
US:H  =(       dV    [         R                  R                  U [        R                  5      (       + =(       a    US;  =(       a    [        R                  n[        U	5      (       a  [        U
5      (       d  [        R                  S4$ [        R                  " U 5      nUR                  nSnU(       a]  [         R"                  " [         R$                  R&                  U SS9n[         R"                  " [         R$                  R&                  U SS9nO      SS jnUnU
S:X  a  U" X5      nUS:X  a  [        R(                  U4$ Ub  [+        U[,        5      (       a  [.        R0                  " [2        S	S5         [5        U5      u  nnS S S 5        Wbj  Wbg  [         R                  R                  R                  [	        UU-   5      5      nU	U:X  a,  [6        R9                  S
UUUUU5        [        R(                  S4$ [        R(                  U4$ X::  d  XS-  S-  :  a  [        R                  S4$ [;        U UUUUUS:w  a  UOSU[        R                  S9nSS jnU" U5      u  nnU(       a  U" U5      u  nn[=        U5      S:X  a  [        R                  S4$ [>        R@                  " URC                  5       URE                  5       5      u  u  nnnSnSnU H  n[         R                  R                  RG                  UU5      n[         R                  R                  RI                  UU[K        URM                  5       5      5      n [O        S U  5       5      n!U!(       a  US-  nM  US-  nM     UU:  a  [        R(                  U" X5      4$ [        RP                  U" X5      4$ ! , (       d  f       GN&= f)NscanrI  r5       T)inner_reductionFc                    gr  r   )reduction_numel_hint
numel_hints     rv   inner_reduction_splits4Reduction.num_splits.<locals>.inner_reduction_splits  s     rx   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr  r   rE  r  c           	     R  ^ [        S [        U R                  5       U R                  5       U R	                  5       S9U S9nUR                  5       nUR                  c   eUR                   V s/ s H=  n [        U [        5      (       d  M  [        U [        R                  5      (       a  M;  U PM?     nn / nSn[        UR                  S S9 H  m[        U4S jU 5       5      (       d  M  UR                  TR                  5        TR                   ["        R$                  R&                  ;   d  Md  ["        R$                  R&                  TR                      n[)        UR*                  SS 5      nUR-                  5         [)        UR*                  SS 5      U:w  d  M  SnM     XE4$ s  sn f )	Nr   r   r   r   r4  r3  Fc                    U R                   $ rq   rl  rt   s    rv   <lambda>@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s    affrx   keyc              3  T   >#    U  H  oTR                   R                  ;   v   M     g 7frq   )r   r'   )r  r  mds     rv   r  AReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>   s     F:aBHH111:   %(r   T)ComputedBufferr   r   r   r   r  
range_varsrr   r   r   Numbersortedr  r  appendr   r   r`   r   name_to_bufferr   r4  decide_layout)	r  cbread_writesr  indiceschangedbuforiginal_strider  s	           @rv   get_read_indices.Reduction.num_splits.<locals>.get_read_indices	  s]   %<<>++-
 B ,,.K ))555 %///Aa& /9!U\\/J /  
 GG[..4DEF:FFFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G F ##!s   -F$
F$+F$r   c              3  *   #    U  H	  oS :  v   M     g7fr5   Nr   r  r   s     rv   r  'Reduction.num_splits.<locals>.<genexpr><  s     /w!Aw   )r  rs   r  rs   r   rs   )r  rt  r   ztuple[Sequence[Expr], bool]))r`   r   r   r$  r[   has_featurer8   REDUCE_TO_SINGLE_ELEMENTr6   split_reductionsrw   rH   r  rG   r  multi_processor_count	functoolsr
   choicesreduction_split_factorINNERrr   rm   r   r   r   r>   logdebugrt  r   r7   index_vars_squeezer   r(  simplify_with_rangesstride_hintsr   keysr  OUTER)"r   	dst_dtyperv  r  r  ru  r_  reduction_numel
input_noder  r  should_splitpropsnum_smmin_elements_per_threadr  outer_reduction_splitssplit
new_rangesnew_reduction_rangesextracted_numel_hintr  r  r  r  r   r  ranges1	num_outer	num_innerr   jr:  outers"                                     rv   
num_splitsReduction.num_splits  s     !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /00Z
5K5K ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K*K\\.2BDI <JG", J
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- ;aZ"_, ((!++--;v-E>5(00	
	$B ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		A  55aAAgg&&33>4#7G /w//EQ	Q	  y  &&(>$)   !&&(>$)  M JIs   P
Pc                R  ^ ^^^^^ T Vs/ s H,  n[         R                  R                  R                  U5      PM.     snm[	        X#5      mSUUU4S jjmUS;   aD  [        SST[        R                  T5      5      R                  5       m      SUU 4S jjmU4S j$ T mT$ s  snf )z1Convert inner_fn from a reduction to an pointwisec                   >^  [         R                  " TU U4S j[        R                  " T Vs/ s H  n[	        U5      PM     sn6  5       5      $ s  snf )Nc              3  6   >#    U  H  nT" TU5      v   M     g 7frq   r   )r  r  r   value_fns     rv   r  =Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>[  s&      # UF++#s   )r  reduce	itertoolsproductr   )r   ru   
combine_fnru  r  s   ` rv   r   *Reduction._unroll_reduction_fn.<locals>.fnX  sN    ##"+"3"3,<=,<q%(,<=# 
 >s   ArK  rJ  Nc                   > U Vs/ s H  n[         R                  " U5      PM     nnT" X5      [        R                  " T" U5      [        R
                  5      4$ s  snf rq   )r   expandr^   
index_exprr   int64)r   r  r   flatten_indexr  s      rv   r  0Reduction._unroll_reduction_fn.<locals>.value_fnl  sO     4::6a%,,q/6:U+NN=#8%++F  ;s    Ac                   > T" U 5      S   $ r  r   )r   r   s    rv   r  0Reduction._unroll_reduction_fn.<locals>.<lambda>u  s    E1rx   )r   rZ  r   r   )r   rZ  r  rZ  r   rp  )	r`   r   r   evaluate_static_shaperr  r6  r   r   r  )	r  ru  r_  rv  ru   r  r  r   r  s	   ``   @@@@rv   _unroll_reduction_fnReduction._unroll_reduction_fnJ  s     @P
?O!AGG2215?O
 .nH
		 		 11' 112BC	
 ln )3E*  .-HIM
s   3B$c
                  ^^^^^^ [         R                  R                  R                  [	        T5      5      mTS:X  an  SU4S jjn
U
" S5      U
" S5      U
" S5      U
" S5      S.mTTR                  5       ;   d
   T S35       eSUUU4S jjn[        R                  UUU[        U5      S9$ TS:X  a-  TS;   a	  SU4S	 jjnO	SUU4S
 jjn[        R                  UTXS9$ [        T[        5      (       a  [         R                  R                  R                  T5      [        R                  :  aQ  [	        U5      S:w  d  [        UR                  5      (       a(  [        R                  UTU R!                  TTTU5      US9$ U R#                  UTUTUTTTU	5	      u  pSU4S jjnU" U5      nU[$        R&                  :X  a  UnUS:X  a7  U	c   e[)        U	5      u  nnUc   eUc   eU R+                  UTUTUTUUTU5
      $ US:  a  U R-                  UTUTUTTUUU	5
      $ [.        R                  [1        UTTUTTUUS95      $ )Nr   c                  > T[         R                  :X  a  [        U 5      $ TR                  (       a,  [        U [        R
                  5      (       d   e[        U 5      $ [        U [        R                  5      (       d   e[        U 5      $ rq   )	r   r   r  rr   typingSupportsFloatr  SupportsIntrs   )valr  s    rv   py_cnst!Reduction.create.<locals>.py_cnst  sg    

*9$00%c6+?+?@@@@ :%%c6+=+=>>>>s8Orx   r5   )rE  rF  rD  rB  z* not supported for zero-dimension tensors!c                8   > [         R                  " TT   T5      $ rq   r^   r  )r   r  r_  rtypes_to_initss    rv   const_fn"Reduction.create.<locals>.const_fn  s    ||ON$CYOOrx   r-  r  c                2   > [         R                  " ST5      $ r  r   )r   r  s    rv   r   Reduction.create.<locals>.fn  s    <<955rx   c                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf rq   r   r  r  )r   r   reduction_indexr  ru  s      rv   r   r    s2    =M&N=Muww||=MO&N#E;; 'O   $4c                l   > [        T5      (       a  U $ U S:  a  [        U [        R                  5      $ U $ r  )rw   r  r6   min_num_split)r  r  s    rv   _maybe_increase_split/Reduction.create.<locals>._maybe_increase_split  s2    /**qy5&"6"677rx   r  r  )r  r   r   zUnion[bool, float, int])r   rs   r   r_   )r  rs   r   rs   )r`   r   r   simplifyr[   r  r  r  r   rr   r   r  r6   unroll_reductions_thresholdrW   r   r  r  rH   r  r>   !create_multilayer_existing_rangescreate_multilayerrm   rt  )r  r   r  rv  r  r  ru  r_  rw  r  r  r  r   hintr  r  r  r  r  r  s     ` ` ``          @@rv   r  Reduction.createz  s    ''**33MBR4STa$ qz"1:
qz	O "_%9%9%;; !""LM;P P ##!F|	 $   a!556 6
< < ##Y $  
 00  33OD001v&!+vfkk/B/B ##11.	  $   nn

	 &e,
 ]222!NB;)))/R0,J, )))'33388 $  QY((   !!1-#-	
 	
rx   c           	        U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R
                  $ U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R                  $ [        U5      (       a  SOSn[        U5      (       a  SOSnUUUUX"U4X"U4[        S5      U4S	.U    $ )
N)r  rJ  z-infF)rC  rK  infTr   r5   )rE  rD  rF  rB  welford_reducerb  online_softmax_reduce)r!   r  r    r   iinforC  r  )r_  r   zeroones       rv   default_accumulatorReduction.default_accumulator  s     ..e$$V}$!%(({{5)---..e$$U|#!%(({{5)---(//uQ&u--d1#40 $D1&+FmT%:
  	rx   c                :    U S:X  a  g[         R                  X5      $ )Nr  r   )rt  r  r_  r   s     rv   default_valueReduction.default_value:  s!     --,,^CCrx   c                    U S:X  a  U$ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U$ )Nr     i      )rH   r  
OUTER_TINY)r  r  rw  s      rv   _multilayer_second_step_hint&Reduction._multilayer_second_step_hintB  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++rx   c                   Uc  g[         R                  R                  R                  UR	                  5       U5      (       d  gUR                  5          [        U5        UR                  5       n[        USS 5       H8  u  pE[         R                  R                  R                  US5      (       d  M6  Us  $    g! [         a     gf = f)z
If we are reducing over the full tensor, and it is non-dense in the last dimension,
reindex so we reduce over the dense dimension. initially just handle complete
reduction case
Nr  r5   )
r`   r   r   r#  r  r  r5  r  r  r   )r  r  r  r:  r   r   s         rv   $check_for_split_dense_dim_reindexing.Reduction.check_for_split_dense_dim_reindexingS  s     ww77  "O
 
 	!*- '')gcrl+DAww771== ,  # 		s   B: :
CCc                  ^^^^^
^ U R                  TU5      n[        R                  UT/U5      m[        R                  R
                  R                  [        R                  " TU-  S5      5      (       + m
      SUUUU
UU4S jjn	U	$ )Nr   c                "  >^^ Uu  nU Gt mnTU-  U-   mSUU
UU4S jjnT(       ac  [        T5      n[        R                  " [        R                  " TU5      [        R                  " TU5      5      n[        R                  " XST	5      $ U" 5       $ )Nc                 $   > T" TT" T /5      5      $ rq   r   )r  r&  	new_indexr   s   rv   bodyCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<rx   )r   r_   )rQ   r^   rM  r  masked)r   r  reduction_blockr.  index_dtyperY  r  r-  
block_sizedefaultr&  	need_maskr  r   s         @@rv   
wrapper_fn5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vrx   )r   Sequence[Symbol]r  r8  r   r_   )	r(  Viewdynamic_reshape_indexerr`   r   r   r-   r   r  )r  r&  ru  r  r  r3  r4  r  dense_indexr6  r5  r   s    ` ` ``   @@rv   _multilayer_wrap_loader!Reduction._multilayer_wrap_loaders  s     >>Z
 ../
 ((>>HH_u,a0
 
		#	6F		 	( rx   c                   ^^^ [        S T 5       5      (       d   ST< 35       e[        R                  U[        U5      [        U5      -   5      m      SUUU4S jjnU$ )Nc              3  *   #    U  H	  oS :H  v   M     g7fr  r   r  r  s     rv   r  DReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>  s     3?a6?r  z8Only enabled for numel_hint == 1, found original_ranges=c           	        > U S [        T5       nU [        T5      S  nT" UT" [        U5      [        U5      -   5      5      $ rq   )r   r   )merged_indexnew_reduction_indexoriginal_idxr-  r&  original_rangesr   s       rv   r6  EReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF rx   )rC  rm  rD  rm  r   r_   )r  r9  r:  r   )r  r&  rF  original_reduction_rangesr  r  r6  r   s    ``    @rv   '_multilayer_wrap_loader_existing_ranges1Reduction._multilayer_wrap_loader_existing_ranges  s     3?333 	
G6HI	
3 ..%uZ'85AU;V'V
		.		!5		 		 		 rx   c                  ^ U[         R                  [         R                  4;  a  UO[         R                  n[        R                  UUUUUUU	U5      nUR                  5         UR                  5       m      SU4S jjn[        R                  R                  R                  [        U5      5      nU R                  XU5      nXWS[        U5       :X  d   e[        R                  [	        UUUUU[        U5      S U	UUS95      $ )I
Break a large reduction up into multiple smaller reductions
recursively
c                   > T" / U QUQ5      $ rq   r   )r   r  intermediate_loaders     rv   intermediate_fn;Reduction.create_multilayer_helper.<locals>.intermediate_fn  s     ''A'A'ABBrx   Nr  )r   rZ  r  rZ  r   r_   )r   float16bfloat16r  rt  r  r  r  r`   r   r   r   r[   r%  r   rm   )r  r   r  rv  r6  rF  rH  r  r  r_  r  rw  intermediate_dtypeintermediaterO  r  rN  s                   @rv   create_multilayer_helper"Reduction.create_multilayer_helper  s$   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%//o0NO
99~
 -Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
rx   c                    [        U5      n[        XS-
  -   U5      nU R                  Xr5      nU R                  UUUUUUU
5      nU R	                  UUUUUU/ UQUPU/UUU	5      $ )rL  r5   )r[   r2   r  r<  rU  )r  r   r  rv  r  r  ru  r_  r  rw  r  r  r3  r4  r6  s                  rv   r  Reduction.create_multilayer  s    & ((89o;UC
##N>00

 ++feL
 	
rx   c                j    U R                  UUUUU5      nU R                  UUUUUU/ UQUQUU	SU
5      $ )rL  r  )rI  rU  )r  r   r  rv  r  rF  rH  r  r  r_  rw  r6  s               rv   r  +Reduction.create_multilayer_existing_ranges)  sc    $ @@% 

 ++%+o+
+ 
 	
rx   r   r[  r`  r  rl  rN  
r#  rO  r$  r2  r%  rK  r  r8  r   r   ri  r   zSequence[Sequence[Expr]]rn  rq   )r   rT  r  rF  rv  rF  r  Callable[..., OpsValue]r  rZ  ru  rZ  r_  z%Union[ReductionType, Literal['scan']]r  r   r  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
r  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]ru  rZ  r_  r   rv  rF  r   z(Callable[[Sequence[_IntLike]], OpsValue])r   rT  r  rF  rv  rF  r  r  r  rK  ru  rK  r_  rD   rw  rH   r  r^  r   rm   r_  r   r   rF  r   #Union[_NumLike, Sequence[_NumLike]])r  ri   r  rs   rw  rH   r   rH   )r  ri   r  r^  r   r]  )r&  r]  ru  rZ  r  ri   r  ri   r3  ri   r4  ra  r  r^  r   Callable[..., object])r&  @Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue]rF  rK  rH  rK  r  Sequence[Integer]r  rd  r   rc  )r   rT  r  rF  rv  rF  r6  r  rF  rK  rH  rK  r  
list[Expr]r  list[Integer]r_  rD   r  ri   rw  rH   r   rm   )r   rT  r  rF  rv  rF  r  r  r  rK  ru  rK  r_  rD   r  ri   rw  rH   r  r^  r   rm   )r   rT  r  rF  rv  rF  r  r  rF  rK  rH  rK  r  rf  r  rf  r_  rD   rw  rH   r   rm   )!r   r   r   r   r   r  r
  r"  r(  r%  r  r  r  r  r1  rr  r  r  r  rH   r  r  r  r  r%  r(  r<  rI  rU  r  r  r   r  r  s   @rv   rt  rt  d  s   ((!!!!N H
 

%#S"S 3S 	S
 )S 
S=


  (,^^^ ^ *	^
 #^ -^ >^ ^ %^ 
(^ ^@ -N-,- - 	-
 
2- -^  )6(=(='+^
^
 ^
 	^

 %^
 ^
 )^
 &^
 &^
 %^
 
^
 ^
@ $/	, < DD$/D	,D D %(:G	   &4D	 >  (,('( -( "	(
 ( ( 5( %( 
( (T P ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
=
 =
~  (,+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 %+
 
+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
$
 $
rx   rt  c                  z   ^  \ rS rSr% S\S'                     SU 4S jjr          SS jrSrU =r$ )	MultiOutputReductioniT  rs   output_indexc
                   >^ [        T5      (       a  T4m[        T5      S:X  a  TS   n
O      SU4S jjn
[        TU ]  UUU
UUUUUS9  Xl        g )Nr5   r   c                4   >^ ^ [        U U4S jT 5       5      $ )Nc              3  4   >#    U  H  o" TT5      v   M     g 7frq   r   )r  r   r   reduction_idxs     rv   r  @MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>n  s     HiR]33i   )r   )r   rm  	inner_fnss   ``rv   r&  -MultiOutputReduction.__init__.<locals>.loaderk  s     HiHHHrx   r  )r   rK  rm  rK  r   ztuple[OpsValue, ...])callabler   r  __init__ri  )r`  r   r  rp  r  ru  r_  rv  rw  ri  r&  r  s      `       rv   rs  MultiOutputReduction.__init__W  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )rx   c           	     P   [         R                  " U R                  U R                  U R                  U R                  X45      5      n[        U[        [        45      (       d   [        U5       5       eXPR                     n[         R                  " U=(       d    SU" U5      U5      $ r  )r^   r  r   rv  r_  r  rr   r   r   r   ri  r  )r`  r#  r$  r%  r  r   r   s          rv   r  $MultiOutputReduction.store_reduction|  s     JJNNMM$/	
 &5$-00CT&\NC0(()"";#;)WT]ERRrx   )ri  )r   rT  r  rF  rp  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r  rd  ru  rd  r_  rD   rv  rF  rw  rH   ri  rs   r[  )	r   r   r   r   r   rs  r  r   r  r  s   @rv   rh  rh  T  s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
S Srx   rh  c                  j    \ rS rSr\\R                  S4                   SS jj5       rSrg)OnlineSoftmaxReductioni  Nc
           	        ^^^^^^^ [        UUUUUUU4S j[        U5       5       5      n
U
 H  nUR                  5         M     U
$ )z.
Create the reduction disregarding splitting.
c              3  p   >#    U  H+  n[         R                  [        TTTTTS TTU5	      5      v   M-     g7f)r  N)rm   r  rh  )	r  
output_idxr   r  r  r  rw  ru  rv  s	     rv   r  0OnlineSoftmaxReduction.create.<locals>.<genexpr>  sO      
 0
 $$+"
  0s   36)r   r   r  )r  r   r  rv  r  r  ru  
num_outputrw  r  resultsr   s    `````` `   rv   r  OnlineSoftmaxReduction.create  sB       
 
 $J/
 
  AIIK rx   r   )r   rT  r  rF  rv  rF  r  r  r  rK  ru  rK  r}  rs   rw  rH   r  r^  r   Sequence[TensorBox])	r   r   r   r   r  rH   r  r  r   r   rx   rv   rx  rx    s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
! !rx   rx  c                      \ rS rSr\\R                  4               SS jj5       r\      SS j5       r	\                  S	S j5       r
Srg)
WelfordReductioni  c                  ^^^^^ US;   d   e[         R                  R                  R                  [	        T5      5      nS
UUU4S jjn	US:X  a  U	" S5      n
U	" S5      nU	" S5      nXU4$ US:X  aD      SUUUU4S jjmUS:X  a  T" US   5      U	" S5      U	" S5      4$ [        U4S jU 5       5      $ [        R                  TTTUS   TTUUS9u  pU[        R                  :X  a  UnUS:  a  U R                  TTUTTUUU5      $ [        S	5       Vs/ s H)  n[        R                  [        TTUTTUTUU5	      5      PM+     nnU H  nUR                  5         M     U$ s  snf )N)r  rb  c                V   >^  SUU 4S jjn[         R                  TTU[        T5      S9$ )Nc                2   > [         R                  " TT5      $ rq   r   )r   r   r  s    rv   r  8WelfordReduction.create.<locals>.const.<locals>.inner_fn  s    || rx   r-  r   rK  r   r_   r  r  r   )r  r  r   r   r  s   ` rv   const&WelfordReduction.create.<locals>.const  s7      ##!F|	 $  rx   r   r5   c                V   >^  SU U4S jjn[         R                  TTU[        T5      S9$ )Nc                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf rq   r  )r   r   r  r&  ru  s      rv   r  7WelfordReduction.create.<locals>.copy.<locals>.inner_fn  s2    =M&N=Muww||=MO&N!#77 'Or	  r-  r  r  )r&  r  r   r   r  ru  s   ` rv   copy%WelfordReduction.create.<locals>.copy  s7    8 8 !''!%<	 (  rx   r  c              3  4   >#    U  H  nT" U5      v   M     g 7frq   r   )r  r   r  s     rv   r  *WelfordReduction.create.<locals>.<genexpr>  s     :	"T"XX	ro  )r_  r  r   )r  rs   r   rm   )r&  z4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r   rm   )r`   r   r   r  r[   r   rt  r  rH   r  r  r   rm   r  r  r  )r  r   r   rp  r  ru  r_  rw  r  r  meanm2weightr  r  r{  r~  r   r  s    `` ``            @rv   r  WelfordReduction.create  s    !FFFF''**33MBR4ST	 	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
 ]222!N19(( 	 	2 $Ah
 '
  $""
 ' 	 
  AIIK %
s   0E#c                    g)N)r   r   r   r   r  s     rv   r  WelfordReduction.default_value.  s     rx   c	                  ^ ^^^^^^ [        T5      m[        R                  R                  R	                  [
        R                  " TT-  S5      5      (       + n	U	(       aB  US:w  a<          S
U4S jjn
T R                  UTUS   [        U
SS9[        U
SS94UTSTUS9$ [        TTS-
  -   T5      m[        R                  UT[        UU UUU4S jU 5       5      / UQTPT/UU5      nU H  nUR                  5         M             SS jm[        R                  R                  R                  [        U5      5      nT R                  TX5      n[        R                  UT[        U4S	 jU 5       5      UT/SU5      $ )rL  r   rb  c                2   > [         R                  " UT5      $ rq   r   )r   rm  r   r   s      rv   r  4WelfordReduction.create_multilayer.<locals>.constantM  s     ||E511rx   r   r5   )r   r   rp  r  ru  r_  r  rw  c           
   3  L   >#    U  H  nTR                  UTTTTS S9v   M     g7f)r   )r4  N)r<  )r  r&  r3  r  r  ru  r  s     rv   r  5WelfordReduction.create_multilayer.<locals>.<genexpr>e  s>      
 (F ++$# ,  (s   !$c                    U" / U QUQ5      $ rq   r   )r   r  r&  s      rv   intermediate_loader_fnBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fnx  s    
 4E4O455rx   c              3  T   >#    U  H  n[        TUR                  5       S 9v   M     g7f))r&  N)r
   r  )r  r   r  s     rv   r  r    s&      &A .q}}G&r  )r   rK  rm  rK  r   rs   r   r_   )r   rK  r  rK  r&  rV  r   r_   )r[   r`   r   r   r-   r   r  r  r
   r2   r  r  r   r  r   r%  )r  r   r   rp  r  ru  r_  r  rw  r5  r  intermediatesr   r  r3  r  r  s   ` `  ` `      @@@rv   r  "WelfordReduction.create_multilayer4  s     ((89((>>HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 
 (
 
 feL#
& AIIK 	6!	6+	6 9	6 		6 WW%%//f0EF
99:
  && &  G
 	
rx   r   N)r   rT  r   rF  rp  Sequence[Callable[..., Any]]r  rf  ru  rf  r_  rD   rw  rH   r   r  r`  )r   rT  r   rF  rp  r  r  rf  ru  rf  r_  rD   r  ri   rw  rH   r   r  )r   r   r   r   r  rH   r  r  rr  r  r  r   r   rx   rv   r  r    s    )6(=(=vv v 0	v
 v (v &v &v 
v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
Z
 Z
rx   r  c                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S S!U 4S jjjrS"U 4S jjr          S#S jrS$S jrS%S jr	S&S jr
S&S jrS'S jrS(S jrS S!S jjr\\R"                  4SS.                   S)S jjj5       r\                  S*S j5       rSrU =r$ )+Scani  rf  scan_rangesr   =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rH   rw  rs   ri  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]rp  c                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7frq   r  r  s     rv   r  ,Scan.get_free_symbol_uses.<locals>.<genexpr>       O>N"1m44>Nr  c              3  <   >#    U  H  n[        UT5      v   M     g 7frq   r  r  s     rv   r  r         Hi"1m44ir  )r  r"  r0   r  r  r   r}  s    `rv   r"  Scan.get_free_symbol_uses  s]    
 G(7l  Od>N>NO l  HdiiH		
rx   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g rq   )r   r  r  r   r  rh  r  s    rv   rh  Scan.__post_init__  =    4;;#d&6&6"773tyy>IIIrx   c                "  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  U R                  U5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7frq   r   r  r  r   s     rv   r  'Scan.store_reduction.<locals>.<genexpr>       D^x}}^   r   )	r   r   rp  r^   r  r  r  r!  ri  )r`  r#  r$  r%  	scan_varsr   resultr   s          @rv   r  Scan.store_reduction  sk     ll4+DT^^DD$++t?yy$9gclF;L;L4M
 	
rx   c                    g)Ncustomr   rg  s    rv   r%  Scan.get_reduction_type  s    rx   c                    U R                   $ rq   )r  rg  s    rv   r(  Scan.get_reduction_size  r~  rx   c                    U R                   $ rq   r   rg  s    rv   r   Scan.get_size      yyrx   c                    U R                   $ rq   r  rg  s    rv   r  Scan.get_pointwise_size  r  rx   c                X    [        U R                  5      [        U R                  5      -   $ rq   )r   r  r  rg  s    rv   r  Scan.index_length  !    4;;#d&6&6"777rx   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ rq   )r  r  r  r4   r  r   r`  r   r  r   s       rv   r  Scan.inner_fn_args  C    DKK(T--t}}=ll5)vrx   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ r  )r  r  r  r4   r  r   r=   r  r`  r   r   r  r   s        rv   r  Scan.inner_fn_free_symbols  M    DKK(T--t}}=ll5)#DMM3TTrx   T)can_fallback_to_atenc                 ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [        U5      S:  aB  [         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      n[        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[        R                  UX,   X<   US9PM     sn$ U R!                  UUS   US   TTTUUS9u  p}["        nUS:  at  [$        R&                  R(                  S L =(       d    [*        =(       a	    [,        S:  =(       a    [        U5      S:H  nU(       d  U(       a  S /[        U5      -  $ SnO[.        nSUUU4S jjn[        [        U5      5       Vs/ s H/  n[0        R                  U" S	UX,   UX<   UUTTUUUUS.U	D65      PM1     nnU H  nUR3                  5         M     U$ s  snf s  snf )
Nr5   r-  r   )r   r   r  axispointwise_rangesr  r  
scan_numelz3.3.0c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ rq   r   )r   
scan_indexr  r  r  s     rv   r   Scan.create.<locals>.reindex	  S    z?c+&6666u:%5!6666>U5D\>J>tu>>rx   )r   r   r  r  rp  r   r  r  r  r   rw  ri  )r   rK  r  rK  r   re  r   )r`   r   r  r8   SCANr   TUPLE_REDUCTIONr   r  r[   r-   r   Ler   r  r  r  r  r   versionhip
has_tritontriton_version	SplitScanrm   r  )r  r   r  rp  r   r  r  rw  r  r   r   r  ri  r  	scan_typesupports_splitr   r~  r  r  r  s        `             @@rv   r  Scan.create  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''v;?177#6#6N22$
 $
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7  &)^^)q\-#!! &4 	&
" 	>!!T)Wj.V^w=V%v;!#  "' 6CK//!"J%		? 	?. !&c&k 2%
$ !3#  ! .!&4'+ +)##1!- " !3% 	 
* FNN  N
s   ;$I3 6I8c	                L   ^^ SUU4S jjn	[         R                  UUUU	UUSUS9$ )Nc                ,   > T" / U S T QUQU TS  Q5      $ rq   r   )r   rm  r  r  s     rv   r6  #Scan.num_splits.<locals>.wrapper_fnK	  s*    Fc%4jF=F3tu:FGGrx   r  )r   r  rv  r  r  ru  r_  r  )r   rK  rm  rK  r   r_   )rt  r  )
r  r   r   r  r  r  r  r  r  r6  s
      ``     rv   r  Scan.num_splits>	  sA    	H 	H ###(!& $ 	
 		
rx   r   r`  r  r>  )
r#  rO  r$  z%Callable[[Sequence[_IntLike]], Never]r%  rK  r  r8  r   r   rN  rl  rJ  ri  r  )r   rT  r  r  rp  z+tuple[Callable[[Sequence[Expr]], Any], ...]r   rf  r  rs   r  r  rw  rH   r  r   r   r   r   Sequence[Optional[TensorBox]])r   rT  r   rF  r  rV  r  rs   r  rf  r  rf  r  r  r  r   r   r_  )r   r   r   r   r   r"  rh  r  r%  r(  r   r  r  r  r  r  rH   r  r  r  r   r  r  s   @rv   r  r    s   
MMSS!!##--
 
 
"
 7
 	

 $
 

 8U  )6(=(=_ &*__ (_ ?	_
 _ _ R_ &_ #_ _ 
'_ _B 

 
 7	

 
 (
 #
 R
 
 
(
 
rx   r  c                      \ rS rSrSrg)r  i[	  r   Nr   r   r   r   r   r   rx   rv   r  r  [	  s    rx   r  c                  X  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   SSU 4S jjjrS U 4S jjr          S!S jrS"S jrS#S jr	S#S jr
S#S jrS$S jrS%S jrSSS jjr\\R"                  4                   S&S jj5       rSrU =r$ )'Sorti`	  rf  sort_rangesr   z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rH   rw  rs   ri  r  r  r  rp  r   stable
descendingc                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7frq   r  r  s     rv   r  ,Sort.get_free_symbol_uses.<locals>.<genexpr>u	  r  r  c              3  <   >#    U  H  n[        UT5      v   M     g 7frq   r  r  s     rv   r  r  x	  r  r  )r  r"  r0   r  r  r   r}  s    `rv   r"  Sort.get_free_symbol_usesq	  s[    G(7l  Od>N>NO l  HdiiH		
rx   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g rq   )r   r  r  r   r  rh  r  s    rv   rh  Sort.__post_init__|	  r  rx   c                6  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  XPR                  U R                  5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7frq   r   r  s     rv   r  'Sort.store_reduction.<locals>.<genexpr>	  r  r  r   )
r   r   rp  r^   sortr  r  r  r!  ri  )r`  r#  r$  r%  r  r   r  r   s          @rv   r  Sort.store_reduction	  so     ll40DT^^DD$++v{{DOOLyy$9gclF;L;L4M
 	
rx   c                    g)Nr  r   rg  s    rv   r%  Sort.get_reduction_type	  s    rx   c                    U R                   $ rq   )r  rg  s    rv   r(  Sort.get_reduction_size	  r~  rx   c                    U R                   $ rq   r  rg  s    rv   r   Sort.get_size	  r  rx   c                    U R                   $ rq   r  rg  s    rv   r  Sort.get_pointwise_size	  r  rx   c                X    [        U R                  5      [        U R                  5      -   $ rq   )r   r  r  rg  s    rv   r  Sort.index_length	  r  rx   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ rq   )r  r  r  r4   r  r   r  s       rv   r  Sort.inner_fn_args	  r  rx   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ r  )r  r  r  r4   r  r   r=   r  r  s        rv   r  Sort.inner_fn_free_symbols	  r  rx   c	                  ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      nSn[        R                  R                  =(       a%    U
R                  [        R                  " X5      5      nU(       d  S /[        U5      -  $ [        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[         R#                  UX.   X>   US9PM     sn$ SUUU4S jjn[        [        U5      5       Vs/ s H3  n[$        R#                  ['        SUX.   UX>   UUTTUUUUUS.U	D65      PM5     nnU H  nUR)                  5         M     U$ s  snf s  snf )Nr5   r"  r-  c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ rq   r  )r   
sort_indexr  r  r  s     rv   r   Sort.create.<locals>.reindex	  r  rx   )r   r   r  r  rp  r   r  r  r   rw  ri  r  r  )r   rK  r  rK  r   re  r   )r`   r   r  r8   SORTr   r   r  r[   r6   r  persistent_reductionsr-   r   r  r   r  r  rm   r  r  )r  r   r  rp  r   r  r  r  rw  r   r   
sort_numel
max_rblockis_persistent_kernelri  r   r~  r  r  r  s        `            @@rv   r  Sort.create	  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''77##&&}['AB
 
MM// Q..uxx
/OP 	 $6CK''6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7 	? 	?0 !&c&k 2'
& !3%  ! .!&4'+ +##1!-!) $ !3' 	 
, FNN  Q
s   $G&:G+r   r`  r  r>  )
r#  rO  r$  rX  r%  rK  r  rK  r   r   rN  rJ  ri  r\  )r   rT  r  r  rp  z'tuple[Callable[[list[Expr]], Any], ...]r   rf  r  rs   r  r   r  r   rw  rH   r   r   r   r  )r   r   r   r   r   r"  rh  r  r%  r(  r   r  r  r  r  r  rH   r  r  r   r  r  s   @rv   r  r  `	  s/    
GG!!##--L	
 	
 
"
 2
 	

 '
 

 8U  )6(=(=LL (L ;	L
 L L L L &L L 
'L Lrx   r  c                :     [        U SS9  g! [         a     gf = f)NFfreezeT)r5  r  rt   s    rv   r   r   	  s&    a. s   
 
c                     [        U SS9u  pUR                  5       (       a  UR                  5         UR                  5       $ ! [         a     gf = fNFr  )r5  should_pad_stridespad_stridesis_contiguousr  )ru   _bufferr4  s      rv    is_contiguous_storage_and_layoutr"  
  sS    /%@ $$&& ##%% s   A A 
AAc           	        [        U [        5      (       a  [        U R                  UUUUUS9$ [        U [        5      (       a5  [        U R                  UUUUUS9u  pgX R                  R                  5       4$ [        U [        5      (       a  U(       as  U(       a6  U R                  5         U R                  5       R                  5       (       d   eO6Ub  U R                  X4S9  O#Ub  U R                  XTS9  OU R                  5         [	        U 5      U R                  5       4$ [        U [        5      (       a#  [        U R                  US9u  pXR                  4$ [        e)z
Try to simplify x into a StorageBox and a Layout.

allow_padding only affect how we apply stride_order. When allow_padding
is True, we have the freedom to add padding when applying the stride_order.
r  want_contiguousstride_orderr  r  r  r  )rr   rm   r5  r3  
StorageBoxr   Bufferr  r   r  r  r  r8  r4  r  )	ru   r  r%  r&  r  r  r   r4  buffers	            rv   r5  r5  
  sJ    !Y$FF+%''
 	
 !Z  )FF+%''
	 &&##%%%!V!||~335555)11  2  *22! 3  !!}alln,,!_%% *FF
	 xx
rx   c                ^     [        U SS9u  p#UR                  U5      $ ! [         a     gf = fr  )r5  is_stride_orderedr  )ru   r&  r!  r4  s       rv   "is_stride_order_storage_and_layoutr-  I
  s8    /%@''55 s    
,,c                   [        U [        [        45      (       a  [        U R                  5      $ [        U [
        5      (       ad  U R                  n[        UR                  [        UR                  5      -  [        -  S:H  5      (       + n[        U R                  5      =(       d    U$ [        U [        5      (       a+  U R                  5       [        R                  R                   ;   $ g)Nr   F)rr   rm   r(  is_unalignedr3  r8  r4  r-   r7  rR   r   rT   r)  r  r`   r   unaligned_buffers)r   r4  has_unaligned_layouts      rv   r/  r/  S
  s    $J/00DII&&$((#8MMN6<<88?JaO$
  
 DII&>*>>$}}!''";";;; rx   c                      \ rS rSr% S\S'   SSS jjrSS jrSS jrSS jr\	S S j5       r
S!S	 jrS"S
 jrS#S jrS$S jrS%S jrS&S jrS'S jrS(S jrS rS rS'S jrS'S jrS)S jrS*S jrS rS+S jrSrg),BaseViewie
  rn   r3  c                8    U R                   R                  U5      $ rq   r3  r"  r!  s     rv   r"  BaseView.get_free_symbol_usesi
  s    yy--m<<rx   c                    [        SU  35      e)Nzmake_reindexer NYI on r}  rg  s    rv   make_reindexerBaseView.make_reindexerl
  s    !$:4&"ABBrx   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ rq   r   r   innerr   s    rv   r$  &BaseView.make_indexer.<locals>.indexers
      &&rx   )r   rK  r   r   )r3  r  r8  )r`  r$  r=  r   s     @@rv   r  BaseView.make_indexero
  s4    		&&(%%'	' 	' rx   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ rq   r   r<  s    rv   r&  $BaseView.make_loader.<locals>.loader|
  r?  rx   r  )r3  r  r8  )r`  r&  r=  r   s     @@rv   r  BaseView.make_loaderx
  s4    		%%'%%'	' 	' rx   c                6    U R                   R                  5       $ rq   )r3  r   rg  s    rv   r   BaseView.dtype
  s    yy""$$rx   c                6    U R                   R                  5       $ rq   r3  r   rg  s    rv   r   BaseView.get_layout
      yy##%%rx   c                6    U R                   R                  5       $ rq   r3  r   rg  s    rv   r   BaseView.get_device
  rJ  rx   c                    g rq   r   rg  s    rv   r|  BaseView.get_origin_node
  r  rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r  BaseView.get_name
      yy!!##rx   c                "    U R                  5       $ rq   r  rg  s    rv   r  BaseView.get_pointwise_size
      }}rx   c                8    U R                   R                  U5      $ rq   r3  r  r  s     rv   r  BaseView.mark_reuse
      yy##E**rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r  BaseView.has_exceeded_max_reads
      yy//11rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r  BaseView.realize
      yy  ""rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r  BaseView.realize_hint
      yy%%''rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r  BaseView.get_storage_numel
      yy**,,rx   c                6    U R                   R                  5       $ rq   r3  r+  rg  s    rv   r+  BaseView.is_extern
      yy""$$rx   c                6    U R                   R                  5       $ rq   )r3  is_module_bufferrg  s    rv   rp  BaseView.is_module_buffer
      yy))++rx   c                6    U R                   R                  5       $ rq   r3  rt  rg  s    rv   rt  BaseView.get_read_names
      yy''))rx   c                    [         R                  " [        SS5         [        U R	                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )r   r   r   r?   r  r   r  rg  s    rv   rs  BaseView.get_reads
  sD    \\.*:DA&  " e	 BAAs   2A
A'c                z    U n[        U[        5      (       a#  UR                  n[        U[        5      (       a  M#  U$ rq   )rr   r3  r3  )r`  ru   s     rv   r  BaseView.unwrap_view
  s1    H%%A H%%rx   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R                  5       UU R                  5       S9$ r*  )r  r   r   r.  r  r   r   r/  s      rv   r1  BaseView.constant_to_device
  sN    !!#n.?HP.."==?	
 	
rx   r   Nr`  r  )r   z*Callable[[Sequence[Expr]], Sequence[Expr]]rW  rU  rE  rG  rQ  rA  r[  rJ  r^  rI  rN  r?  rh  rn  )r   r   r   r   r   r"  r8  r  r  ru  r   r   r   r|  r  r  r  r  r  r  r  r+  rp  rt  rs  r  r1  r   r   rx   rv   r3  r3  e
  s    
L=C % %&&$+2#(-%,*	
rx   r3  c                  P    \ rS rSr% S\S'   \S 5       r\S 5       rS
S jr	S r
Srg	)r   i
  re  r   c                t   [         R                  R                  n[        [	        [
        R                  U5      5      nU R                  5       nS/[        U5      [        U5      -
  -  [        U5      -   n[        U5      [        U5      :X  d   e[        [        U5      5       H  nX   S:X  a  X4   c   eX4   X'   M  X4   bN  [         R                  R                  R                  R                  [
        R                  " X4   S5      SS9(       a  Mo  UR                  X   X4   -
  SS9S:X  a  M   S5       e   U$ )	zReplace `-1` with correct sizesNr  r5   Tsize_obliviousr   fallbackzKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})r`   r   r   r   r  r   r  r   r   r   r   evaluate_exprr  r   )ru   new_sizer   old_sizer   s        rv   _normalize_sizeExpandView._normalize_size
  s    77##ELL(34::<6S]S]:;d8nL8}H---s8}%A{b {...&k$(8(8(B(B(P(Pa( )Q )   ))(+*Ca)PTUU aU &" rx   c           	        U R                  X5      n[        U5      (       Ga3  [        U5      u  p4[        U5      [        UR                  5      -
  nUS:  d   e[
        R                  R                  /U-  n[        UR                  UR                  5       H|  u  pxUR                  [        R                  R                  R                  R                  [
        R                   " US5      SS9(       d  UO[
        R                  R                  5        M~     [#        UR$                  UR&                  [)        U5      UUR*                  5      n	[-        X9S9$ [/        XS9$ )Nr   r5   Tr  r2  )r3  r   )r  r   r5  r   r   r   r  r  r   r   r  r`   r   r   r   r  r  r6  r   r   r   r7  r8  r   )
r  ru   r  r;  r<  skipr=  r   r   r>  s
             rv   r  ExpandView.create
  s   &&q3 ##"7":Gx=3z#77D199'',,$.J #J$5$5z G!!77++55CCq)$ D    !H %!!  X!!J #CCq00rx   c                    U R                   $ rq   r  rg  s    rv   r   ExpandView.get_size   r  rx   c                   ^^ U R                  5       nU R                  R                  5       m[        U5      [        T5      -
  mUU4S jnU$ )Nc                   > [        U TS  5      n [        U 5      [        T5      :X  d   e[        [        T5      5       H*  nTU   S:X  d  M  [        R                  R
                  X'   M,     U $ r  )r   r   r   r   r  r  )r   r   actualr  s     rv   r   *ExpandView.make_reindexer.<locals>.reindex  s]    tu&Eu:V,,,3v;'!9>$ww||EH ( Lrx   )r   r3  r   )r`  targetr   r  r  s      @@rv   r8  ExpandView.make_reindexer  s>    ##%6{S[(	 rx   r   NrJ  )r   r   r   r   r   rr  r  r  r  r   r8  r   r   rx   rv   r   r   
  s8    
 4 1 16rx   r   c                  P    \ rS rSr% S\S'   \S 5       r\S 5       rS
S jrS r	Sr
g	)PermuteViewi  re  dimsc           
        U R                  U5      n[        U5      [        [        [        U5      5      5      :X  d   e[	        U5      (       a}  [        U5      u  p4[        UR                  UR                  U Vs/ s H  oTR                  U   PM     snU Vs/ s H  oTR                  U   PM     snUR                  5      n[        X6S9$ [        XS9$ s  snf s  snf )Nr2  )r3  r  )_map_neg_dimsr0   r   r   r   r5  r6  r   r   r   r   r7  r8  r  )r  ru   r  r;  r<  r   r>  s          rv   r  PermuteView.create  s      &$:eCI.>#???? ##"7":G$!!  -12T#T2/34t!""1%t4!!J #CC-- 34s   :CCc                X    U Vs/ s H  o"S:  a  UO[        U5      U-   PM     sn$ s  snf r  r  )r  r  r(  s      rv   r  PermuteView._map_neg_dims*  s+    @DEaxSY_4EEEs   'c                   [        U R                  U R                  5      5      [        [        [	        U R                  5      5      5      :X  d   eU R
                  R                  5       nU R                   Vs/ s H  o!U   PM	     sn$ s  snf rq   )r0   r  r  r   r   r3  r   )r`  r   r   s      rv   r   PermuteView.get_size.  sq    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   8B	c                N  ^ [        U R                  5       VVs0 s H  u  pX!_M	     snnm[        [        U R                  5      5       Vs/ s H  nTU   PM
     snm[	        T5      [	        [        [        U R                  5      5      5      :X  d   eU4S jnU$ s  snnf s  snf )Nc                8   > T Vs/ s H  oU   PM	     sn$ s  snf rq   r   )r   r   invs     rv   r   +PermuteView.make_reindexer.<locals>.reindex:  s    &)*c!Hc***s   )r   r  r   r   r0   )r`  r   r  r   r  s       @rv   r8  PermuteView.make_reindexer5  s     )$)) 45 4qt 45$S^454!s1v45#*U3tyy>-B"CCCC	+  65s   BB"r   NrJ  )r   r   r   r   r   r  r  r  r   r8  r   r   rx   rv   r  r    s:    
. ." F F,rx   r  c                  J    \ rS rSr\SS.S j5       r\SS j5       rS	S jrSr	g)
SqueezeViewi@  N)r(  c          	        [        U5      (       Ga!  [        U5      u  p4/ n/ nUb=  [        U[        5      (       d   S5       eSU::  a  U[	        UR
                  5      :  d   e[        [        UR
                  UR                  5      5       Hm  u  nu  pUc,  US:w  a$  UR                  U5        UR                  U	5        M5  M7  Xr:w  a$  UR                  U5        UR                  U	5        M`  US:X  a  Mh   S5       e   [        UR                  UR                  UUUR                  5      n
[        X:S9$ Uc;  [        R!                  XR#                  5        Vs/ s H  oS:w  d  M
  UPM     sn5      $ UR#                  5       U   S:X  d   e[        R!                  U[        UR#                  5       5       VVs/ s H  u  p{Xr:w  d  M  UPM     snn5      $ s  snf s  snnf )Nzexpected integer dim argumentr   r5   zexpected squeezed size to be 1r2  )r   r5  rr   rs   r   r   r   r   r   r  r6  r   r   r7  r8  r9  r  r   )r  ru   r(  r;  r<  r  r=  r   r   r   r>  r   s               rv   r  SqueezeView.createB  s    ##"7":GHJ!#s++L-LL+CxC#joo*>$>>>%.s:??JDUDU/V%W!>D;qy -"))&1 ! x -"))&1#qyJ*JJy &X %!!  !!J #CC;;;qjjl"El1f1l"EFF::<$)));;q1::<1H"U1HAH11H"UVV #F #Vs   	G
$G
6GGc                   ^^ U  Vs/ s H  oS:w  d  M
  UPM     nn[        U 5       VVs/ s H  u  p1US:w  d  M  UPM     snnm[        U 5      mSUU4S jjnX$4$ s  snf s  snnf )Nr5   c                   > [        U 5      [        T5      :X  d   U  ST 35       e[        R                  R                  /T-  n[	        TU 5       H	  u  p#X1U'   M     [        U5      $ )N )r   r   r  r  r   r   )r   r-  r   r   lengthnot_ones       rv   r   %SqueezeView.squeezer.<locals>.reindexn  sb    u:W-C%'/CC-/Igu-!"# .##rx   )r   zlist[sympy.Expr]r   ztuple[sympy.Expr, ...])r   r   )r   r   r  r   r   r  r  s        @@rv   squeezerSqueezeView.squeezerh  sa    #.t!AvAt.!*4;AF1;T	$ 	$    /;s   	AAA A c                    [        S5      e)Nzuse SqueezeView.create())AssertionError)r`  r3  s     rv   rs  SqueezeView.__init__w  s    788rx   r   )r   rm  r>  )
r   r   r   r   r  r  rr  r  rs  r   r   rx   rv   r  r  @  s3    " #W #WJ ! !9rx   r  c                  b    \ rS rSr% S\S'   S\S'   S rSS jrSS jr\r\	S	 5       r
SS
 jrSrg)GenericViewi{  re  r   r  r   c                    U R                   $ rq   )r   rg  s    rv   r8  GenericView.make_reindexer      ||rx   c                   [        [        U R                  5      5       Vs/ s H  n[        [        R
                  U5      PM     nn[        U R                  U5      5      nSSR                  [        [        U5      5       SU 3$ s  snf )Nzlambda , r  )r   r   r   rZ   r4   r  r   r   r  r  r   )r`  r  	index_old	index_news       rv   reindex_strGenericView.reindex_str  sv    CHTYYCX
CXa*4::q9CX 	 
 i01	3sI#6789+FF	
s   $Bc                z    U R                  U R                  SU R                   3SU R                  5        3/5      $ )Nsize=zreindex=)r  r3  r   r  rg  s    rv   r  GenericView.__str__  s=    YY%		{+x8H8H8J7K-LM
 	
rx   c                $    U " U[        U5      US9$ )Nr3  r   r   )r   )r  ru   r  r   s       rv   r  GenericView.create  s    X@@rx   c                    U R                   $ rq   r  rg  s    rv   r   GenericView.get_size  r  rx   r   Nr[  rJ  )r   r   r   r   r   r8  r  r  r
  r  r  r   r   r   rx   rv   r  r  {  s>    
G

 HA Arx   r  c                      \ rS rSr\S 5       r\S 5       r\S 5       r\ S	       S
S jj5       r	\S	SS jj5       r
Srg)r9  i  c                   [         R                  " U 5      n [         R                  " U5      n[        R                  R                  R
                  R                  nU" [         R                  " U S5      5      (       a  X-   n U $ r  )r   r  r`   r   r   r   r  Lt)r   r   r  s      rv   handle_negative_indexView.handle_negative_index  s[    ll3||D!((22@@#q)***C
rx   c                  ^	 [        U[        [        45      (       d   eU R                  UR	                  5       U5      u  m	n[
        R                  R                  R                  T	U5      (       a  U$ Sn[        [        T	5      5      S:  d  [        [        U5      5      S:  a  SnSU;   a  U	4S jnU " U[        U5      US9$ [        U5      (       d  U(       a  U(       a%  [        U5      (       d  [        R                  U5      n[        USS9u  pV[        UR                   UR"                  U[$        R'                  U5      UR(                  5      n[+        XWS9$ U R-                  T	U5      nU " U[        U5      US9$ )NFr   Tc                4   > [        S/[        T5      -  5      $ r  )r   r   )r   r  s    rv   fake_reindex!View.create.<locals>.fake_reindex  s    aS3x=011rx   r  )r%  r2  )rr   r   r   resolve_negative_sizer   r`   r   r   statically_known_list_equalsr   r(   r"  ExternKernelrequire_contiguousr5  r6  r   r   r   r   r7  r8  r:  )
r  ru   r  unbacked_symbols_in_sizesr  r;  r<  r>  r   r  s
            @rv   r  View.create  sT   (UDM2222 66qzz|XN( 77888LLH$)!%h/014(23a7(,%=2 ADNLII-a004M(2RST2U2U !33A6"74"PG$!!  11(;!!J #CC--hAX@@rx   c                P   U Vs/ s H,  n[         R                  R                  R                  U5      PM.     nnU  Vs/ s H,  n[         R                  R                  R                  U5      PM.     n n[	        U5      n[        [        U5      5       HI  nX   S:X  d  M  [        R                  R                  X'   [        [        U 5      [        U5      5      X'     O   [         R                  R                  R                  [        U 5      [        U5      5        X4$ s  snf s  snf )Nr  )r`   r   r   r  r   r   r   r   r  Oner1   r[   guard_equals)r  r  ru   r   s       rv   r  View.resolve_negative_size  s    :BC(QAGG$$--a0(C:BC(QAGG$$--a0(C>s8}%A{b #ggkk&}X'>h@WX	 & 	
%%mH&=}X?VW!! DCs
   3D3D#Nc                     U R                  XU5      nU$ ! [        [        4 a=    [        U5      /nU R                  X5      nU R                  XR5      n[	        Xg5      n U$ f = frq   )_dynamic_reshape_indexerr  
IndexErrorr[   r   )r  r  r  	dense_dimr   flatr   r   s           rv   r:  View.dynamic_reshape_indexer  sr    	:228yQG  
+ 	:!(+,D33HCH33DCH%h9G	:s    A	A#"A#c                  ^^ [         R                  R                  R                  n[	        [        U5      5       Vs/ s H  n[        [        R                  U5      PM     snm[        [        TU5      5      n[        U 5      nUSL=(       a&    U[        U5      S-
  :g  =(       a    [        U5      S:H  nU(       a'  Uc   eUR                  U5      nUR                  U5        / mU(       GaK  U(       GaC  UR                  5       n	UR                  5       u  pU	S:X  a=  TR                  [        R                  R                  5        UR                  X45        GOUS:X  a  UR                  U	5        GOU" U5      U" U	5      :X  a<  TR                  U
5        [         R                  R                  R!                  X5        GOfU" U5      U" U	5      :  a~  U" U5      U" U	5      :  a1  UR                  5       u  pX-  U
-   n
X-  nU" U5      U" U	5      :  a  M1  TR                  U
5        [         R                  R                  R!                  X5        OU" U5      U" U	5      :  a  [        R                  R"                  nU	nTR                  [%        XU5      5        X-  nU" U5      U" U	5      :  aG  UR                  5       nTR                  [%        XU5      5        X-  nX-  n	U" U5      U" U	5      :  a  MG  [         R                  R                  R!                  X5        O[&        eU(       a
  U(       a  GMC  U(       al  UR                  5       n	[         R                  R                  R!                  U	S5        TR                  [        R                  R                  5        U(       a  Ml  U(       aE  UR                  5       u  p[         R                  R                  R!                  US5        U(       a  ME  UbB  [        U5      S:X  a3  TR)                  5         TR                  5       nTR+                  UU5        OTR)                  5         [        T5      [        U 5      :X  d   eUU4S jnU$ s  snf )z7
Perform a reshape entirely by modifying indexing math
Nr5   c                   >^ [        U 5      [        T5      :X  d   [        U 5      [        T5      45       e[        [        TU 5      5      m[        U4S jT 5       5      $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7frq   )r\   )r  ru   replacementss     rv   r  AView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>B  s     HiA|44ir  )r   r   r   r   )r   r  r%  	view_exprs    @rv   r   .View._dynamic_reshape_indexer.<locals>.reindex?  sM    u:T*CSZT,CC*D% 01LHiHHHrx   )r`   r   r   r   r   r   rZ   r4   VIEWr   r   r  r  r   r  r  r  r  r3   r  reverseinsert)r  r  r  r   r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r%  r  s                     @@rv   r  View._dynamic_reshape_indexer  s   
 GG$$..	 CHHBV
BVQ*499a8BV
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD/C/C'3H  )Ih,??   %  --hA8$y'::''++"  w!GH!+)Ih,??'mmoG$$_S7%KL%/G'1H	  )Ih,??
   --hA$$= II@  }}HGG))(A6UWW\\* i
 %MMOMCGG))(A6 i  S]a%7"JY
39~X...	I
 W
s   $Q/r   rq   )r  rZ  r  rZ  r  r]  r   &Callable[[Sequence[_T]], Sequence[_V]])r  r]  )r   r   r   r   rr  r  r  r  r  r:  r  r   r   rx   rv   r9  r9    s      *A *AX " " 
 $(	$ % !	
 
0   R Rrx   r9  c                     ^  \ rS rSr% SrS\S'   SU 4S jjrSS jr\rSS jr	SS jr
SS	 jr\S
 5       rSS jrS rSS jrSS jrSS jrS r S   SS jjrSS S jjrS!S jrSrU =r$ )"r8  iG  z*Pretend our storage has a different layoutr  r4  c                   > [         TU ]  5         [        U R                  [        5      (       a0  [
        R                  U SU R                  R                  5       5        g g )Nr3  )r  rh  rr   r3  r3  r   r_  r  r  s    rv   rh  ReinterpretView.__post_init__M  sC    dii**tVTYY-B-B-DE +rx   c                P    U R                  U R                  U R                  /5      $ rq   )r  r3  r4  rg  s    rv   r  ReinterpretView.__str__R  s&    		
 	
rx   c                6    U R                   R                  5       $ rq   rQ  rg  s    rv   r  ReinterpretView.get_name\  rS  rx   c                .    U R                   R                  $ rq   )r4  r   rg  s    rv   r   ReinterpretView.get_device_  s    {{!!!rx   c                    g rq   r   rg  s    rv   r|  ReinterpretView.get_origin_nodeb  r  rx   c                .    U R                   R                  $ rq   )r4  r   rg  s    rv   r   ReinterpretView.dtypee  s    {{   rx   c                @    [        U R                  R                  5      $ rq   )r   r4  r   rg  s    rv   r   ReinterpretView.get_sizei  s    DKK$$%%rx   c                @    [        U R                  R                  5      $ rq   )r   r4  r   rg  s    rv   r  ReinterpretView.get_stridel  s    DKK&&''rx   c                   ^  SU 4S jjnU$ )Nc                Z  > TR                   R                  5       n[        R                  " TR	                  5       U" U 5      5      nTR                   R
                  TR                  R
                  :w  a6  [        R                  " UTR
                  TR                  R
                  5      $ U$ rq   )r4  r  r^   loadr  r   r3  to_dtype_bitcast)r   r$  
tmp_loaderr`  s      rv   r&  +ReinterpretView.make_loader.<locals>.loaderp  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!rx   r   rK  r   r_   r   r`  r&  s   ` rv   r  ReinterpretView.make_loadero  s    	" rx   c                6    U R                   R                  5       $ rq   )r4  r  rg  s    rv   r  ReinterpretView.make_indexerz      {{''))rx   c                    U R                   $ rq   r4  rg  s    rv   r   ReinterpretView.get_layout}  r  rx   c                    g rq   r   rg  s    rv   r  ReinterpretView.freeze_layout  r  rx   c                    [        U R                  R                  U5      [        U R                  R                  U5      -  [        U R                  R                  U5      -  $ rq   )r   r4  r   r   r7  r!  s     rv   r"  $ReinterpretView.get_free_symbol_uses  sQ     T[[--}=t{{11=ABt{{11=AB	
rx   c           	     p   [         R                  R                  R                  U R                  U R
                  R                  U R
                  R                  U R
                  R                  Ub  UR                  O#[         R                  R                  R                  U R
                  R                  S9$ r  )r`   r   wrapper_codecodegen_reinterpret_viewr3  r4  r   r   r7  	writeliner   r  s     rv   r  !ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
rx   c                    gr  r   rg  s    rv   r  ReinterpretView.num_reads      rx   r   r>  r[  rQ  rA  rJ  rU  rW  rG  r`  rk  rq   rP  ri  )r   r   r   r   __doc__r   rh  r  r
  r  r   r|  ru  r   r   r  r  r  r   r  r"  r  r  r   r  r  s   @rv   r8  r8  G  s    4NF

 H$" ! !&(	* %*
!
	!

 rx   r8  c                  f    \ rS rSr% SrS\S'   \S 5       rSS jr\r	\
S 5       rSS jrSS	 jrS
rg)	DtypeViewi  z(Pretend our storage has a different typerF  target_dtypec                    [        U5      (       aM  [        U5      u  p4[        UR                  UUR                  UR
                  UR                  5      n[        X5S9$ [        XS9$ )Nr2  )r3  r$  )	r   r5  r6  r   r   r   r7  r8  r#  )r  ru   	new_dtyper;  r<  r>  s         rv   r  DtypeView.create  sa     ##"7":G$!!!!!!J #CCa88rx   c                P    U R                  U R                  U R                  /5      $ rq   )r  r3  r$  rg  s    rv   r  DtypeView.__str__  s     		4+<+<=>>rx   c                    U R                   $ rq   )r$  rg  s    rv   r   DtypeView.dtype  s       rx   c                6    U R                   R                  5       $ rq   r3  r   rg  s    rv   r   DtypeView.get_size  rS  rx   c                L   ^ ^ T R                   R                  5       mUU 4S jnU$ )Nc                |   > [         R                  " T" U 5      TR                  TR                  R                  5      $ rq   )r^   r	  r$  r3  r   )r   r=  r`  s    rv   r&  %DtypeView.make_loader.<locals>.loader  s*    ''c
D4E4EtyyWWrx   r3  r  )r`  r&  r=  s   ` @rv   r  DtypeView.make_loader  s"    		%%'	X rx   r   Nr[  rJ  rU  )r   r   r   r   r!  r   r  r  r  r
  ru  r   r   r  r   r   rx   rv   r#  r#    sE    29 9? H! !$rx   r#  c                  8    \ rS rSr\S 5       r\SS j5       rSrg)	SliceViewi  c                \  ^ ^^^^	^
 [         R                  R                  m
UR                  5       U   m[	        S X4T4 5       5      (       a!  [
        R                  m	[
        R                  mOT
R                  m	T
R                  mUU	U
4S jmUU U4S jnU" USTS5      nU" XCTT5      nX44$ )zb
Normalize start and end such that both are in the range
[0, x.get_size()[dim]] and start <= end.
c              3  8   #    U  H  n[        U5      v   M     g 7frq   )r(   r  ru   s     rv   r  0SliceView.normalize_start_end.<locals>.<genexpr>  s     H1GA$Q''1Grq  c                   > TR                  X5      (       a  U OT" X5      nTR                  X25      (       a  UnU$ T" X25      nU$ rq   )statically_known_geqr"  )ru   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr   s        rv   clamp,SliceView.normalize_start_end.<locals>.clamp  s^    221<<(1BT 
 00FF  
   m3 
  rx   c                D   > U c  U$ TR                  U T5      n T" XU5      $ rq   )r  )r  r<  r=  r4  rB  r  dim_sizes       rv   
clamp_wrap1SliceView.normalize_start_end.<locals>.clamp_wrap  s,    {++C:CU++rx   r   )
r`   r   r   r   rB  r   MinMaxevaluate_minevaluate_max)r  ru   r(  startendrF  rB  rE  r@  rA  r   s   `     @@@@@rv   normalize_start_endSliceView.normalize_start_end  s     77##::<$H%h1GHHHyyHyyH,,H,,H		 	, 5!Xq1Xx8zrx   c           	       ^^^^ [         R                  " T5      m[        T[         R                  5      (       d  TS:  d   e TS:X  a  US:  a  TS:X  a  U$ [        UR                  5       5      mU(       a  U R                  UTTU5      u  mn[        UT-
  TS-
  -   T5      TT'   [        U5      (       av  [        U5      u  px[        UR                  5      n	U	T   T-  U	T'   [        UR                  UR                  TU	UR                  UR                  T   T-  -   5      n
[!        XzS9$ UUUU4S jn[#        UTUS9$ ! [         a     Nf = f)Nr   l    r5   r2  c                   > [        U 5      [        T5      :X  d   SU  ST 35       e[        U 5      n U T   T-  T-   U T'   U $ )Nzwrong ndim r  )r   r   )r   r(  r  rL  steps    rv   r   !SliceView.create.<locals>.reindex  sP    u:X.P+eWAhZ0PP.KEsd*U2E#JLrx   r  )r   r  rr   r   	TypeErrorr   r   rN  r2   r   r5  r   r6  r   r   r7  r8  r5  )r  ru   r(  rL  rM  rR  rB  r;  r<  r=  r>  r   r  s     `` `      @rv   r  SliceView.create  sQ   ||D!$

++tax77	zcY.419 

%
 00CDJE3 uq!94@ ##"7":Gj//0J(o4JsO$!!  !!J$5$5c$:U$BBJ #CC	 	 ah@@E  		s   E   
EEr   N)r5   T)r   r   r   r   r  rN  r  r   r   rx   rv   r5  r5    s+    " "H (A (Arx   r5  c                  R    \ rS rSr% S\S'   S\S'   SS jrSS jrSS jrSS	 jrS
r	g)BaseConstanti  rF  r   rT  r   c                    gNr   r   rg  s    rv   r   BaseConstant.get_size  s    rx   c                    U R                   $ rq   r  rg  s    rv   r   BaseConstant.get_device   r  rx   c                    g rq   r   rg  s    rv   r|  BaseConstant.get_origin_node#  r  rx   c                    [        5       $ rq   r/   rg  s    rv   rs  BaseConstant.get_reads&  r  rx   r   NrJ  rQ  rA  rh  )
r   r   r   r   r   r   r   r|  rs  r   r   rx   rv   rW  rW    s"    rx   rW  c                  R    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrSS
 jrSrg)Constanti*  r   r   rF  r   rT  r   c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ rq   )r^   r  r   r   r   r`  s    rv   r&  $Constant.make_loader.<locals>.loader1  s    <<

DJJ77rx   r  r   r  s   ` rv   r  Constant.make_loader0  s    	8 rx   c                    g rq   r   rg  s    rv   r  Constant.realize6  r  rx   c                @    [        U R                  U R                  US9$ )N)r   r   r   )rb  r   r   r  s     rv   r1  Constant.constant_to_device9  s    djj

6JJrx   r   NrU  rN  rn  )	r   r   r   r   r   r  r  r1  r   r   rx   rv   rb  rb  *  s#    JKrx   rb  c                  H    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrS
rg)IndexingConstanti=  r   r   rF  r   rT  r   c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ rq   )r^   r  r   r   re  s    rv   r&  ,IndexingConstant.make_loader.<locals>.loaderD  s    >>$**djj99rx   r  r   r  s   ` rv   r  IndexingConstant.make_loaderC  s    	: rx   c                @    [        U R                  U R                  US9$ )N)r   r   r   )rm  r   r   r  s     rv   r1  #IndexingConstant.constant_to_deviceI  s    djj

6RRrx   r   NrU  rn  )r   r   r   r   r   r  r1  r   r   rx   rv   rm  rm  =  s    JSrx   rm  c           	     b    [        S [        U [        R                  U5      U5       5       5      $ )Nc              3  H   #    U  H  u  pnUS :H  =(       d    X:H  v   M     g7fr  r   )r  leftrightr   s       rv   r  2is_contiguous_strides_for_shape.<locals>.<genexpr>P  s.      "
D 		"T]""
s    ")r  r   r   r   )r   r'  s     rv   is_contiguous_strides_for_shapery  M  s5      !$N55e<e"
  rx   c                <    [         R                  U R                  -  $ rq   )r6   padding_alignment_bytesitemsizer  s    rv   get_align_for_dtyper}  X  s    ))U^^;;rx   c                  ,    \ rS rSrSrSS jrSS jrSrg)	r  i\  ztAbstract base for Layout, MultiOutputLayout, NoneLayout.
Represents the memory layout of the output of an Operation.c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   r   OutputSpec.get_device`  r  rx   c                >    [        [        U 5      R                  5      erq   r  rg  s    rv   storage_sizeOutputSpec.storage_sizec  r  rx   r   NrQ  ri  )r   r   r   r   r!  r   r  r   r   rx   rv   r  r  \  s    C77rx   r  c                      \ rS rSrS\" S5      4           SS jjrSS jr\rSS jrSS jr	SS jr
\      SS	 j5       rSS
 jrSS jrS r\S 5       rS rS rS rSS jrSS jrSS jrSrg)r  ig  Nr   c                    Uc  [         R                  U5      nXl        X l        [	        U5      [	        U5      :X  d   SU SU 35       e[        S U 5       5      (       d   eX0l        X@l        XPl        g )Nr  	, stride=c              3  N   #    U  H  n[        U[        [        45      v   M     g 7frq   )rr   r   rs   r  s     rv   r  "Layout.__init__.<locals>.<genexpr>v  s     <t!:a$--t   #%)	r   r   r   r   r   r  r   r   r7  )r`  r   r   r   r   r7  s         rv   rs  Layout.__init__i  sq     >#66t<F
4yCK'H5ix)HH'<t<<<<< $	"("rx   c                \   SnU R                   S:w  a  SU R                    3nU R                  R                  c  SOSU R                  R                   3n[        U 5      R                   SU R                  R                   U SU R
                   SU R                   SU R                   U S	3$ )
Nr  r   z	, offset=:z('z', z, size=r  r  )r7  r   r   r   r   r   r   r   )r`  r7  device_index_strs      rv   r  Layout.__str__{  s    ;;! .F!%!2!2!:2!DKKDUDUCV@WDz""#2dkk&6&6%78H7ITZZL YII;i}VHA?	
rx   c                    U R                   $ rq   r  rg  s    rv   r   Layout.get_device  r  rx   c                    [         R                     [        R                  " [	        U R
                  5      [	        U R                  5      U R                  U R                  S9sS S S 5        $ ! , (       d  f       g = f)Nr   r   )	r`   	fake_moder   r   rN   r   r   r   r   rg  s    rv   get_exampleLayout.get_example  sG    [[&&'		2'4jj{{	 [[s   AA,,
A:c                B    [        U R                  U R                  5      $ rq   )ry  r   r   rg  s    rv   r   Layout.is_contiguous  s    .t{{DIIFFrx   c                    [        U 5      nUS;  d	  U S   S:X  a  g[        U[        U 5      U 5       H  u  p4nUS:w  d  M  X4:w  d  M    g   g)N)r      r5   FT)r   r   r"   )r'  r:  ndimrv  rw  r   s         rv   is_channels_last_contiguous"Layout.is_channels_last_contiguous  sX     5zvqQ!$3E:E"
D qyT]	"

 rx   c                    [        U R                  [        [        R	                  [        [        U R                  5      5      5      5      U R                  5       H  u  pnUS:w  d  M  X:w  d  M    g   g)Nr5   FT)r   r   reversedr   r   r   r   )r`  rv  rw  r   s       rv   is_transposedLayout.is_transposed  sZ    !$KK^66tHTYY<O7PQRII"
D
 qyT]"
 rx   c           	        [        U R                  5      [        U5      :X  d   e[        U R                  5       VVs/ s H5  u  p#[        R
                  R                  R                  USS9S:w  d  M3  UPM7     nnnU Vs/ s H  o R                  U   PM     nnU Vs/ s H  o!U   PM	     nnS nU" U5      nS/[        U5      -  n[        [        U5      5       H  nXR   XqU   '   M     [        [        U5      S-
  5       H_  nXr   XrS-      :  n[        U[        5      (       d2  [        R
                  R                  R                  Xr   XrS-      :  SS9nU(       d  M_    g   gs  snnf s  snf s  snf )	Nr   r  r5   c                d    [        U 5      nU  Vs/ s H  o!R                  U5      PM     sn$ s  snf rq   )r  r   )arr
sorted_arrelements      rv   sorted_indices0Layout.is_stride_ordered.<locals>.sorted_indices  s,    J=@AS'$$W-SAAAs   -r  Tr  F)r   r   r   r   r`   r   r   r   r   rr   r   
_shape_envr  )	r`  r   r   r(  non_1_indicesr   r  stride_orderedexprs	            rv   r,  Layout.is_stride_ordered  sl   4;;3u:---
 $DII.
.ww))#):a? . 	 
 +88-Q++a.-8#01=aq=1	B
 u% E
*s5z"A'-yN8$ # s5zA~&A!$~!e'<<DdD))ww))77"%1u(==d 8  t ' ;
 91s   2E'3E' E-E2c                    S/[        [        [        S[        U R                  5      S-
  5      5      5      -   n[        U5      /U-   nU R                  U5      $ Nr   r5   )r   r  r   r   r   r,  r  s     rv   is_channels_last_stride_ordered&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,rx   c                   [        U5      n[        U 5      S:X  a  U $ [        R                  (       d  [        R                  X5      (       a  U $ [        R                  " 5       n[        US5      (       a#  UR                  R                  SS5      (       a  U $ [        S [        R                  " X5       5       5      (       d  U $ [        U 5      n[        U5      n[!        [        U 5      5       Vs/ s H  nSPM     nnSXS   '   Sn	[#        USS SS9 HE  u  pXjS-
     nX   X   -  nU[        R$                  :  a  X-  S:w  a  ['        X5      U-  nS	n	XU'   MG     U	(       d  U $ [(        =R*                  S-  sl        U$ s  snf )
zv
The padding does not change stride order but makes sure all strides larger
than the threshold are multiple of align.
r   rF  dislike_paddingFc              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7frq   )rr   rs   r   r   r  s     rv   r  &Layout._pad_strides.<locals>.<genexpr>  s*      
6 q3.//6s   -/r5   N)rL  T)r}  r   r6   pad_channels_lastr  r  r`   get_current_noder  rF  getr  r  chainr   r   r   r   padding_stride_thresholdrL   r   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noder&  r   r   new_stridespaddedrankr   prev_idxr   s                 rv   _pad_stridesLayout._pad_strides  s    $E*z?a''F,N,N-
 -
 ,,.?F++0D0D0H0Hu1
 1
   
__Z6
 
 
 '
3,\:
"'J"89"8Qq"89 &'qM"":ab>;ID!(+H *T^;F777FNa<O /%7% <  ))Q.)- :s   0Fc                    [        U [        5      (       d   eU R                  c   eU R                  U R                  U R                  U R
                  5      U l        g rq   )rr   r   r   r  r   r   rg  s    rv   r  Layout.pad_strides  sG    $////{{&&&''TYY

Krx   c                P    [         R                  =(       a    [        U [        5      $ rq   )r6   comprehensive_paddingrr   r   rg  s    rv   r  Layout.should_pad_strides  s    ++P
40PPrx   c                    [        U [        5      (       a  U $ U R                  5       (       a  U R                  5         [        U R                  U R
                  U R                  U R                  U R                  5      $ rq   )	rr   r6  r  r  r   r   r   r   r7  rg  s    rv   as_fixedLayout.as_fixed  s_    dK((K""$$KKJJIIKKKK
 	
rx   c                    [         R                  (       d   S[        U 5      R                   S35       eU R	                  5       R                  5       $ )Nzconvert z to FixedLayout first)r   r  r   r   r  r  rg  s    rv   r  Layout.make_indexer)  sG    ,, 	
tDz**++@A	
, }}++--rx   c                4   U R                   UR                   :H  =(       ay    U R                  UR                  :H  =(       aY    U R                  UR                  :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ rq   r   r   r   r   r7  )r`  others     rv   __eq__Layout.__eq__/  so    KK5<<' ,

ekk),		UZZ', u||+, u||+	
rx   c                X    [        U R                  U R                  U R                  5      $ rq   )r   r   r   r7  rg  s    rv   r  Layout.storage_size8  s    .tyy$++t{{SSrx   )r   r   r7  r   r   )r   rT  r   rF  r   re  r   zOptional[list[Expr]]r7  r   r   r   r[  rS  )r   torch.TensorrI  )r'  rZ  r:  rZ  r   r   rW  r   z
sympy.Expr)r   r   r   r   r   rs  r  r
  r   r  r   rr  r  r  r,  r  r  r  r  r  r  r  r  r   r   rx   rv   r  r  g  s     (,qz## # 	#
 %# # 
#$	
 HG !,>	 !F- 8 8tL
Q
.
Trx   r  c                  "    \ rS rSrSrSS jrSrg)r6  i<  z A Tensor layout we cannot changec                   ^  U 4S jnU$ )z1A closure containing math to read a given elementc                $  > [        U 5      [        TR                  5      :X  d   e[        U 5      [        TR                  5      :X  d   eTR                  n[	        U TR                  TR                  5       H  u  p#nUS:w  d  M  XU-  -   nM     U$ r  )r   r   r   r7  r   )r   r  r   r   szr`  s        rv   r$  )FixedLayout.make_indexer.<locals>.indexerB  s{    u:T[[!1111u:TYY///[[F#&udkk499#ER7#Fl2F $F Mrx   r   r`  r$  s   ` rv   r  FixedLayout.make_indexer?  s    	 rx   r   NrW  )r   r   r   r   r!  r  r   r   rx   rv   r6  r6  <  s
    *rx   r6  c                     ^  \ rS rSrSrSr\S 5       r\S 5       r\S 5       r	\S 5       r
\S 5       rSS	 jrSS
 jrS rS rSSU 4S jjjrSrU =r$ )r   iN  z(A Tensor layout we are allowed to changeFc                    [        U 5      S:X  a  / $ [        R                  R                  /n[	        U SS  5       H  nUR                  X!S   -  5        M     [        [	        U5      5      $ )Nr   r5   r  )r   r   r  r  r  r  r   )sizesreversed_stridesr   s      rv   r   !FlexibleLayout.contiguous_stridesT  s^    u:?I!GGKK=U12Y'D##DB+?$?@ (H-.//rx   c                    [        [        [        U 5      5      5      [        U5      :X  d   X45       e[        R                  R
                  nS/[        U5      -  nU H  nX#U'   X U   -  nM     U$ )z
Create a stride based on the order the dimensions should be filled in.

In this format, channels last would be:
    [1, 3, 2, 0]
N)r0   r   r   r   r  r  )r  r   next_strider:  r   s        rv   fill_orderedFlexibleLayout.fill_ordered]  sm     %E
+,
50AAQE>QAggkk&3u:%A$AJ%a0K  rx   c                    [        [        [        U 5      5      5      [        U5      :X  d   e[        U5      n[        R                  X5      $ )zz
Create a stride based on the sorted order of a permuted range.

In this format, channels last would be:
    [3, 0, 2, 1]
)r0   r   r   r   r   r  )r  r   r   s      rv   r  FlexibleLayout.stride_orderedn  s@     %E
+,
50AAAA,U3
**5==rx   c                D   U[         R                  :X  a  [        R                  U [        5      $ U[         R
                  :X  a  [        R                  U [        5      $ U[         R                  :X  a  [        R                  U 5      $ [        R                  SU5        [        e)a9  
Create a stride based on a memory format.

Memory format is translasted into a stride order,
so channels_last is the same as:
    FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

This interface does not support memory_format `torch.preserve_format`
which should be used to deduce a format from another source
z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r   channels_lastr   r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr   r  r  r  )r  memory_formats     rv    stride_ordered_for_memory_format/FlexibleLayout.stride_ordered_for_memory_formatz  s     E///!008IJJe444!008JKKe555!44U;;IIP &%rx   c                (   [        U 5      [        U5      :X  d   eU Vs/ s H,  n[        R                  R                  R	                  U5      PM.     nn[        [        [        U5      5      UR                  S9n[        R                  X5      $ s  snf )z
Create a stride that has the same stride order as given stride

For example, if given stride is [1000, 1, 100, 10],
the fill order should be [1, 3, 2, 0]
r  )
r   r`   r   r   r  r  r   __getitem__r   r  )r  r   ru   r   s       rv   same_orderedFlexibleLayout.same_ordered  su     5zS[(((BHI&Q!''""55a8&IE#f+.F4F4FG
**5== Js   3Bc                ,   U R                  U R                  U5      nU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[        U R                  U R                  U R                  UU R                  5      $ rq   )r  r   r  r  r   r6  r   r7  )r`  r   r  r=  s       rv   as_stride_orderFlexibleLayout.as_stride_order  so    ((E:
""$$**:yy$**MJKKJJIIKK
 	
rx   c                    UnU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[	        U R
                  U R                  U R                  UU R                  5      $ rq   )r  r  r   r   r6  r   r7  )r`  r  r  r=  s       rv   as_exact_stridesFlexibleLayout.as_exact_strides  s^    "
""$$**:yy$**MJKKJJIIKK
 	
rx   c                   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  5      $ rq   )r  r   r  r  r   r6  r   r7  )r`  r   r=  s      rv   as_fill_orderFlexibleLayout.as_fill_order  sm    &&tyy%8
""$$**:yy$**MJKKJJIIKK
 	
rx   c                   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  5      $ rq   )r  r   r  r  r   r6  r   r7  )r`  r   r=  s      rv   as_same_orderFlexibleLayout.as_same_order  sm    &&tyy&9
""$$**:yy$**MJKKJJIIKK
 	
rx   c                   > U(       a  [         R                  X45      nO[         R                  U5      n[        TU ]  XX55        g rq   )r   r  r   r  rs  )r`  r   r   r   r&  r:  r  s         rv   rs  FlexibleLayout.__init__  s5    $11$EG$77=G6rx   r   r`  rq   r>  )r   r   r   r   r!  r  rr  r   r  r  r  r  r  r  r  r   rs  r   r  r  s   @rv   r   r   N  s    2N 0 0    	> 	> & &0 
> 
>





7 7rx   r   c                  @   ^  \ rS rSrSrSU 4S jjrSS jrS rSrU =r	$ )	NonOwningLayouti  z,Is a view into the storage of another tensorc                   > UR                  5       n[        TU ]	  UR                  UR                  UR
                  UR                  5        Xl        g rq   )r   r  rs  r   r   r   r   view)r`  r  r4  r  s      rv   rs  NonOwningLayout.__init__  s?    "MMLLKKMM		
 	rx   c                >    U R                  5       R                  5       $ rq   )r  r  rg  s    rv   r  NonOwningLayout.make_indexer  s    }}++--rx   c                    U R                   R                  5       R                  nUS:X  a  gSSKJn  [
        R                  R                  R                  X5      $ )Nr   Tr5   )	ALIGNMENT)	r  r   r7  utilsr  r`   r   r   statically_known_multiple_of)r`  r7  r  s      rv   maybe_guard_aligned#NonOwningLayout.maybe_guard_aligned  sB    %%'..Q;$ww<<VOOrx   )r  )r  zUnion[BaseView, TensorBox]r   r   rW  )
r   r   r   r   r!  rs  r  r  r   r  r  s   @rv   r  r    s    6.P Prx   r  c                      \ rS rSrSrSrg)CommBufferTypei  symm_memr   N)r   r   r   r   SYMM_MEMr   r   rx   rv   r  r    s    Hrx   r  c                  R   ^  \ rS rSr% SrS\S'   S\S'         S	U 4S jjrSrU =r$ )
CommBufferLayouti  a\  
A layout that signifies the buffer is a comm buffer.
In terms of striding, the layout is identical to `FixedLayout`.

Buffers with this layout do not participate in in-place reuse - it can be
neither the source nor the target for in-place reuse.

For detailed motivation and usage of this layout, see
NOTE [lowering-time collective optimization].
r  comm_buffer_typer   
group_namec                  > [        U[        5      (       d  [        SU S35      eUR                  5       n[        TU ]  UR                  UR                  UR                  UR                  UR                  S9  X l        X0l        g )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).r  )rr   r   r  r  r  rs  r   r   r   r   r7  r  r  )r`  r4  r  r  fixedr  s        rv   rs  CommBufferLayout.__init__  s     &.11 ++1("6 
 !<<++<<<< 	 	
 !1$rx   )r  r  )r4  r   r  r  r  r   )	r   r   r   r   r!  r   rs  r   r  r  s   @rv   r  r    s;    	 %$O%% )% 	% %rx   r  c                      \ rS rSr% S\S'   \R                  " S S9rS\S'   \R                  " S S9rS\S	'   SS
 jr	S r
SS jrSrg)
NoneLayouti  rR  r   c                     S/$ r  r   r   rx   rv   r  NoneLayout.<lambda>)  s    rx   default_factoryrb  r   c                     S/$ r  r   r   rx   rv   r  r  *  s    1#rx   r   c                    gr  r   rg  s    rv   r  NoneLayout.storage_size,  r   rx   c                    U $ rq   r   rg  s    rv   r  NoneLayout.as_fixed/      rx   c                    U R                   $ rq   r  rg  s    rv   r   NoneLayout.get_device2  r  rx   r   Nri  rQ  )r   r   r   r   r   rp  rq  r   r   r  r  r   r   r   rx   rv   r  r    sC     #"!''DD)D#))+FFIFrx   r  c                     ^  \ rS rSrSU 4S jjr\SS j5       r\R                  SS j5       rSS jrSS jr	S r
\SS j5       rS	 rSS
 jrSrU =r$ )MutationLayoutSHOULDREMOVEi6  c                  > [         TU ]  UR                  5       UR                  5       UR	                  5       S 5        Xl        U R                  5       R                  5       n[        R                  R                  U5        g rq   )r  rs  r  r   r   r  
get_bufferr  r`   r   mark_buffer_mutated)r`  r  r   r  s      rv   rs  #MutationLayoutSHOULDREMOVE.__init__7  sc    &&(OO		
  ))+	##D)rx   c                6    U R                  5       R                  $ rq   )real_layoutr   rg  s    rv   r   !MutationLayoutSHOULDREMOVE.strideB  s    !(((rx   c                    g rq   r   )r`  r   s     rv   r   r2  F  s    rx   c                >    U R                  5       R                  5       $ rq   )r1  r  rg  s    rv   r  'MutationLayoutSHOULDREMOVE.storage_sizeJ  s    !..00rx   c                p   ^ U4S jmT" U R                   5      n[        U[        5      (       d   S5       eU$ )Nc                   > [        U [        5      (       a  T" U R                  5      $ [        U [        5      (       a  T" U R	                  5       5      $ [        U [
        5      (       a  T" U R                  5      $ U $ rq   )rr   r+  r  r3  r  
MutableBoxr3  )r  unwrap_viewss    rv   r9  ;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_viewsN  sb    &"<==#FMM22&(++#F$6$6$899&*--#FKK00Mrx   z1MutationLayoutSHOULDREMOVE must refer to a buffer)r  rr   r)  )r`  r  r9  s     @rv   r-  %MutationLayoutSHOULDREMOVE.get_bufferM  s<    	 dkk*&&)) 	
?	
) rx   c                6    U R                  5       R                  $ rq   )r-  r4  rg  s    rv   r1  &MutationLayoutSHOULDREMOVE.real_layout]       '''rx   c                   UR                  5         [        R                  R                  UR	                  5       5        [        U[        5      (       a  UR                  nUR                  5         U(       d  [        R                  UR                  5       UR                  5       UR                  5       [        UR                  5       UR                  5       5       VVs/ s H.  u  pE[        R                  R                   R#                  XE5      PM0     snnS9R                  nUR                  5         [        UR                  R$                  [&        5      (       d   e[)        U5      UR                  l        UR                  $ s  snnf )Nr-  )r  r`   r   r.  r  rr   rm   r3  r  r  r  r   r   r  r   r   r   r  r4  r   r+  )r  srcdstunsafe_aliasrS  rT  s         rv   realize_into'MutationLayoutSHOULDREMOVE.realize_into`  s    	
##CLLN3c9%%((C 	""~~'mmo* !$CLLNCLLN C C GG$$11!7 C	 #  d  	#((//>::::4S9xxs   5Fc                    U $ rq   r   rg  s    rv   r  #MutationLayoutSHOULDREMOVE.as_fixed  r'  rx   c                6    U R                   R                  5       $ rq   )r  r  rg  s    rv   r  'MutationLayoutSHOULDREMOVE.make_indexer  r  rx   )r  )r  rn   r   r   r   re  )r   r   r   r   r  )r   r)  r`  rW  )r   r   r   r   rs  ru  r   setterr  r-  r1  r  rC  r  r  r   r  r  s   @rv   r+  r+  6  sg    	* ) ) ]] 1 (    D* *rx   r+  c                  p  ^  \ rS rSr% S\S'   S\S'   S$U 4S jjrS%S jrS&S jrS'S	 jrS(S
 jr	S)S jr
\S*S j5       rS+S jrS,S jrS-S jrS.S jrS/S jrS rS rS0S$S jjrS$S jrS$S jr S0 S$S jjrS rS1S jrS2S3S jjrS rS4S jrS4S jrS5S jr S0   S6S jjrS7S  jr S8S! jr!S9S" jr"S#r#U =r$$ ):r)  i  rO  r   r  r4  c                F   > [         TU ]  5         U R                  SS 5        g r  )r  rh  rb  r  s    rv   rh  Buffer.__post_init__  s    t4rx   c                >    U R                  5       R                  5       $ rq   )r   r  rg  s    rv   r  Buffer.make_indexer  s     --//rx   c                J    U R                   (       d   U 5       eU R                   $ rq   rl  rg  s    rv   r  Buffer.get_name  s    yy$yyyrx   c                    [        U R                  [        5      (       a  U R                  R                  5       $ [	        [        U R                  5      R                  5      erq   )rr   r4  r  r  r  r   r   rg  s    rv   r  Buffer.get_example  s@    dkk6**;;**,,!$t{{"3"<"<==rx   c                >    U R                  5       R                  5       $ rq   )r  r   rg  s    rv   r   Buffer.get_device  s    ##%0022rx   c                    g rq   r   rg  s    rv   r  Buffer.get_defining_op  r  rx   c                6    U R                  5       R                  $ rq   )r   r   rg  s    rv   r   Buffer.dtype  s     &&&rx   c                :    / U R                  5       R                  Q$ rq   )r   r   rg  s    rv   r   Buffer.get_size  s    ("''((rx   c                :    / U R                  5       R                  Q$ rq   )r   r   rg  s    rv   r  Buffer.get_stride  s    *"))**rx   c                6    U R                  5       R                  $ rq   )r   r7  rg  s    rv   
get_offsetBuffer.get_offset  r>  rx   c                    [        U R                  [        5      (       a  U R                  $ [        [	        U R                  5      R
                  5      erq   )rr   r4  r  r  r   r   rg  s    rv   r   Buffer.get_layout  s7    dkk6**;;!$t{{"3"<"<==rx   c                    U R                   $ rq   r  rg  s    rv   r  Buffer.get_output_spec  r  rx   c                "    U R                  5       $ rq   )r  rg  s    rv   r  Buffer.get_storage_numel  s    ~~rx   c                    [        U R                  [        5      (       a@  [        U R                  [        5      (       d   U R                  R	                  5       U l        g g g rq   )rr   r4  r  r  r  rg  s    rv   r  Buffer.freeze_layout  sF    dkk6**:KK4
 4
 ++..0DK4
*rx   c                    [        U R                  [        5      (       d   eU R                  R                  XS9U l        g Nr'  )rr   r4  r   r  r   s      rv   r  &Buffer.freeze_layout_with_stride_order  s2    $++~6666kk11%1Urx   c                    [        U R                  [        5      (       d   eU R                  R                  U5      U l        g rq   )rr   r4  r   r  r  s     rv   r  $Buffer.freeze_layout_with_fill_order  s/    $++~6666kk//6rx   c                    [        U R                  [        5      (       d   eU R                  R                  U5      U l        g rq   )rr   r4  r   r   r
  s     rv   r  $Buffer.freeze_layout_with_same_order  s/    $++~6666kk//7rx   c                    [        U R                  [        5      (       d   eU R                  R                  XS9U l        g rj  )rr   r4  r   r  r  s      rv   r  'Buffer.freeze_layout_with_exact_strides  s9     $++~6666kk22 3 
rx   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ r  r  rg  s    rv   r  Buffer.is_zero_elements  r  rx   c                v   ^  T R                  5       (       a  [        [        T R                  5       S9$ U 4S jnU$ )Nr  c                   > TR                  5       n[        R                  " TR                  =(       d    SU" U 5      5      $ r  )r  r^   r  r   r   r$  r`  s     rv   r&  "Buffer.make_loader.<locals>.loader  s/    '')G88DII2GENCCrx   )r  r
   r  r   r  s   ` rv   r  Buffer.make_loader  s3      ""=0@AA	D rx   c                "    U R                  5       $ rq   r  r  s     rv   r  Buffer.codegen_reference  rV  rx   c                    g rq   r   rg  s    rv   r  Buffer.decide_layout  r  rx   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ grY  )rr   r4  r  r  r  rg  s    rv   r:  #Buffer.get_inputs_that_alias_output  s2    dkk?33KK$$--/00rx   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ grY  )rr   r4  r+  r  r  rg  s    rv   r4  Buffer.get_mutation_names  s3    dkk#=>>KK&&//122rx   c                6    [        U R                  5       /5      $ rq   )r0   r  rg  s    rv   rt  Buffer.get_read_names  s    4==?+,,rx   c                    [        5       $ rq   r/   r!  s     rv   r"  Buffer.get_free_symbol_uses       |rx   c                    [        5       $ rq   r/   rg  s    rv   r  Buffer.get_unbacked_symbol_defs  r  rx   c                    g rq   r   rg  s    rv   r  Buffer.realize  r  rx   c                    gr  r   rg  s    rv   should_allocateBuffer.should_allocate  s    rx   r  r>  rW  r[  )r   z!Union[torch.Tensor, sympy.Symbol]rQ  rB  rE  rJ  rI  rM  rG  rH  r`  rU  rq   rP  ro  r?  rk  r  rN  rI  )%r   r   r   r   r   rh  r  r  r  r   r  ru  r   r   r  r_  r   r  r  r  r  r  r  r  r  r  r  r  r:  r4  rt  r"  r  r  r  r   r  r  s   @rv   r)  r)    s     
50>
3 ' ')+(>
 1V78
 ,1
	
U	

- %*!	!
 rx   r)  c                  J    \ rS rSrSS jrSS jr\R                  rS	S jrSr	g)
OperationBufferi  c                    U /$ rq   r   rg  s    rv   r  OperationBuffer.get_outputs  s	    vrx   c                    U $ rq   r   rg  s    rv   r  OperationBuffer.get_defining_op  r'  rx   c                X    [         R                  U 5        [        R                  U 5        g rq   )r)  rh  rw  rg  s    rv   rh  OperationBuffer.__post_init__  s    T"%rx   r   Nr  r   rw  r>  )
r   r   r   r   r  r  rw  r7  rh  r   r   rx   rv   r  r    s     #55&rx   r  c                      \ rS rSrSS jrSrg)InputBufferi  c                    gr  r   rg  s    rv   r  InputBuffer.num_reads  r   rx   r   Nri  )r   r   r   r   r  r   r   rx   rv   r  r    s    rx   r  c                      \ rS rSrSrSrg)DonatedBufferi#  aA  
Represents a donated buffer which is a saved tensor that is not alias to any
fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
reuse the input tensor memory during backward since it might be used in another
function. However, donated buffer can be inplace reused during backward
to save memory.
r   N)r   r   r   r   r!  r   r   rx   rv   r  r  #  s    rx   r  c                  8    \ rS rSr% SrS\S'   SS jrS	S jrSrg)
r.  i-  NrR  r,  c                   ^  SU 4S jjnU$ )Nc                   > TR                  5       R                  5       n[        R                  " [        R
                  R                  TR                  5       TR                  5      U" U 5      5      $ rq   )	r   r  r^   r  r`   r   constant_namer  r,  rv  s     rv   r&  *ConstantBuffer.make_loader.<locals>.loader1  sP    oo'446G88%%dmmot7K7KL rx   r  r   r  s   ` rv   r  ConstantBuffer.make_loader0  s    	 rx   c                    [        [        R                  R                  U R	                  5       U5      U R
                  S9$ N)r   r4  )r.  r`   r   r  r  r4  r  s     rv   r1  !ConstantBuffer.constant_to_device:  s/    &&t}}?
 	
rx   r   rU  rn  )	r   r   r   r   r,  r   r  r1  r   r   rx   rv   r.  r.  -  s    .2O+2
rx   r.  c                  V    \ rS rSrS	S jr S
   SS jjrSSS jjrSS jrSS jrSr	g)NoneAsConstantBufferi@  c                    [        5       $ rq   r/   rg  s    rv   rs  NoneAsConstantBuffer.get_readsB  r  rx   c                    [        5       $ rq   r/   r!  s     rv   r"  )NoneAsConstantBuffer.get_free_symbol_usesE  r  rx   Nc                J    [         R                  R                  R                  $ rq   )r`   r   r  none_strr  s     rv   r  &NoneAsConstantBuffer.codegen_referenceJ  s    ww##,,,rx   c                    [        S S9$ Nr  )r  rg  s    rv   r  $NoneAsConstantBuffer.get_output_specM  s    &&rx   c                    gr  r   rg  s    rv   r  &NoneAsConstantBuffer.has_tensor_outputP  r  rx   r   rh  r`  rk  rq   rP  rH  rI  )
r   r   r   r   rs  r"  r  r  r  r   r   rx   rv   r  r  @  s0     %*!	!
-'rx   r  c                  N    \ rS rSr% S\S'    S	   S
S jjrSSS jjrSS jrSrg)r   iT  r   r  c                .    [        U R                  U5      $ rq   )r   r  r!  s     rv   r"  *ShapeAsConstantBuffer.get_free_symbol_usesX  s      		=99rx   Nc                h    [         R                  R                  R                  U R                  5      $ rq   )r`   r   r  codegen_sizevarr  r  s     rv   r  'ShapeAsConstantBuffer.codegen_reference]  s!    ww##33DII>>rx   c                    gr  r   rg  s    rv   r  'ShapeAsConstantBuffer.has_tensor_output`  r  rx   r   r`  rk  rq   rP  rI  )	r   r   r   r   r   r"  r  r  r   r   rx   rv   r   r   T  s+    
J %*:!:	!:
?rx   r   c                    ^  \ rS rSr% S\S'   SS jrSS jrSS jrSS jrSS jr	 S   SS	 jjr
SU 4S
 jjrSS jrS S jrS!S jr\  S"S j5       r  S#     S$S jjr\ S%S j5       rS&S jrSS jrS'S jrS'S jrS(S jrSrU =r$ ))r  id  r  r3  c                    U R                   b  U R                   $ [        U R                  S5      (       a  U R                  R                   $ g)z}
Returns self.name if it exists, otherwise returns the name of the data node if that exists.
If neither exist, returns None.
Nr   )r   r  r3  rg  s    rv   get_computed_buffer_name'ComputedBuffer.get_computed_buffer_nameh  s:    
 99 99499f%%99>>!rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r  ComputedBuffer.num_readss  rn  rx   c                6    U R                   R                  5       $ rq   r3  rs  rg  s    rv   rs  ComputedBuffer.get_readsv  rn  rx   c                6    U R                   R                  5       $ rq   rt  rg  s    rv   rt  ComputedBuffer.get_read_namesy  rv  rx   c                   [         R                  " [        SS5         U R                  R	                  5       (       aT  [        U R                  5       U R                  R                  5       U R                  R                  5       5      sS S S 5        $ [        U R                  5       U R                  R                  5       5      sS S S 5        $ ! , (       d  f       g = fr  )
r   r   r   r3  r%  r?   get_store_functionr  r(  r   rg  s    rv   r  ComputedBuffer.get_read_writes|  s    \\.*:DAyy++--*++-II002II002 BA +++-II&&( BAAs   A*C1C
Cc                    [        U R                  5       U5      [        U R                  5       U5      -  [        U R                  5       U5      -  U R                  R                  U5      -  $ rq   )r   r   r  r_  r3  r"  r!  s     rv   r"  #ComputedBuffer.get_free_symbol_uses  s`    * T]]_m<t0-@At0-@A ii,,];<	
rx   c                   > U R                  5       (       dV  U R                  [        R                  R                  ;  a.  U R                  5       S:X  a  U R                  R                  5       $ [        TU ]  5       $ r  )	r%  r   r`   r   mutated_buffersr  r3  r  r  r  s    rv   r  ComputedBuffer.make_loader  sZ    ''))		!8!88 A% 99((**w"$$rx   c                   U R                  5       R                  5       R                  5       n[        U R                  [
        [        [        45      (       a+  [        U R                  R                  U R                  U5      $ [        U R                  [        5      (       d   e[        U R                  R                  U R                  U5      $ rq   )r   r  r  rr   r3  rt  r  r  r
   r  r   r  r'  r  s     rv   r  !ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!89949944diiIIdii333349911499gFFrx   c                   [        U R                  [        5      (       Ga  [        R                  " U R
                  R                  5       U R
                  R                  5       5      u  u  pnU R                  5       R                  n[        S U 5       5      (       d   eU VVs/ s Hk  n[        U[        R                  5      (       d  M$  [        UR                  U Vs0 s H%  ofS:w  d  M
  U[        R                  R                   _M'     sn5      PMm     nnnU(       a  [        U R
                  ["        [$        45      (       a  U R
                  R'                  X5      nOUnU Vs/ s H,  n[(        R*                  R,                  R/                  X5      PM.     n	nSSKJn
  U
" XR5                  5       5      $ gs  snf s  snnf s  snf )aD  
If our layout is still flexible, try to determine the stride order based on stride orders of reads.

TODO(jansel): A better algorithm here would look at downstream consumers of this
              value and try to do global graph-level layout optimization.
              This is also something just begging to be autotuned.
c              3  v   #    U  H/  n[        U[        R                  [        R                  45      v   M1     g 7frq   )rr   r7   StarDep	MemoryDepr@  s     rv   r  0ComputedBuffer.get_fill_order.<locals>.<genexpr>  s2      A 1|33\5K5KLMMs   79r   r5   pick_loop_orderN)rr   r4  r   r7   r  r3  r  r(  r  r  r  r  r\   r   r   r  r  r  r  r   r`   r   r   r  	schedulerr  r   )r`  
index_varsr  r   r  r  vr  r  stride_lengthsr  s              rv   r   ComputedBuffer.get_fill_order  sx    dkk>22.:.M.M		,,.		0L0L0N/+(Z! ((*00E       Aa!7!78 Y
177n$WnUVPV_Q_n$WX   dii$66"ii//
KG(GMR"MRTAGG$$11$@U  " 7&~}}GG# %X"s*   %#F?F? 	F:-F:F?(3G:F?c                    [        U R                  [        5      (       a:  U R                  5       nU(       a  U R	                  U5        g U R                  5         g g rq   )rr   r4  r   r   r  r  r  s     rv   r  ComputedBuffer.decide_layout  sC    dkk>22'')E2259""$ 3rx   c                   [         R                  " U R                  R                  5       U R                  R	                  5       SS9u  p[
        R                  " [        SU R                  5       5         [        U R                  5       U R                  5       (       a  UOUS S U/UQ76 nS S S 5        / n/ n/ n/ nUR                  5        Hf  u  pXS   ;   a-  U(       a   eUR                  U5        UR                  U	5        M:  XS   ;   d   eUR                  U5        UR                  U	5        Mh     Xg4WXE44$ ! , (       d  f       N= f)Nqrk   r,  r5   r   )r7   r  r3  r  r(  r   r   r.  r   rA   r  r%  itemsr  )
r`  r   
var_rangesr.  r  reduce_vars
index_sizereduce_sizer  r   s
             rv   get_default_sizes_body%ComputedBuffer.get_default_sizes_body  s6    (::II((*DII,H,H,JSV
 \\.*;T__=NO'')0022Ra 	D P 
!#
$$&DAG|&&!!!$!!!$G|#|""1%""1% ' ($0III) POs   37D;;
E	c                  ^ ^^ T R                  5       u  u  p4nu  pgU(       a  U" X44XVU45      u  u  p4nu  pg/ UR                  R                  5       QmUb  [        U[        5      (       a  [        U5      S:X  d   eUu  p[        U[        5      (       d   e[        U	[        5      (       d   e[        S U	 5       5      (       d   eUR                  n
X:X  d	   U
U45       eU	 Vs/ s H  oT;  d  M
  UPM     n	nTU	-  m/ UR                  5       Qm[        R                  R                  T [        R                  5      (       d  TR!                  UR#                  5       5        UUU 4S jnXg-   n[%        ['        T 5      5      (       + =(       d    [(        R*                  (       + nU" UUUU5      u  nnnU" X}XN5      u  nnn[,        R.                  " UUSS9u  u  nnn[1        UU" U5      U" U5      /UUU5      nUU4U4$ s  snf )a  
This is a main place where we do loop transformations in a
backend-agnostic way.

Here we:
    1) Remove any 1 dimensions
    2) Fuse contiguous dimensions together
    3) Reorder dimensions based on stride orders

Optional argument extra_indexing_constraints can be used to append additional
indexing expressions to existing ones derived from buffer's body. This can be useful
to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
the scheduler node compatible with other nodes.
Optional argument recompute_sizes_body_func can be used to recompute sizes and body
on the default body. This can be useful to append additional loop transformations.
r   c              3  B   #    U  H  n[        U[        5      v   M     g 7frq   )rr   r   )r  fs     rv   r  6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>5  s     H4Gqz!T**4Gs   c           	        > TR                  XUT
5      u  p$nU" U 5      n U(       aD  [        R                  R                  R	                  U U[        T	X5      5      u  p&n[        XV5      nOUnX(U4$ rq   )_apply_loop_reorderingr`   r   r   _simplify_loopsr;   r   )x_varssupport_varsr  simplify_loopsreindex0r   r   _pruner   index_formulasmemory_addrsr`  s            rv   simplify_and_reorderAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorderF  sy    (,(C(Ce\)%EX f%F*+''*:*:*J*J,^VK+'
 *(="8++rx   prk   )r  indexing_exprsr   rr   r   r   r   r   r  r  get_write_exprsr`   r   r  r8   PREFER_STORE_LOOP_ORDERextendget_read_exprsrW   r  r6   loop_ordering_after_fusionr7   index_vars_no_squeezerA   )r`  extra_indexing_constraintsrecompute_sizes_body_funcr  r  r.  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  r  r  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r  r  s   `                     @@rv   r  #ComputedBuffer.simplify_and_reorder  sV   4 '')		
%Z%Z %
 *)4k1J	))
 94..5578%15u==23q89 :T6!3T::::148888H4GHHHHH"&//&? #%B ? /#.a>2I.   # 11N0--/0ww""4)O)OPP 3 3 56	,$ "/t,--VV5V5V1V 	 (<	(
$\1 ,@{,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11w#s   %	G?2G?c           
     n   SSK Jn  Uc  / n U Vs/ s H-  n[        R                  R                  R                  X`U5      PM/     nn[        U5      [        U5      :X  a  [        US   5      [        U 5      :X  d   e[        [        U" XrU5      5      5      nU V	s/ s H  oU	   PM	     nn	U[#        U5      [%        U5      4$ s  snf ! [         a^    [        R                  (       a)  [        R                  S[        [        X5      5      U5        [        [!        [        U5      5      5      n Nf = fs  sn	f )zE
Shuffle the order of loops around to hopefully improve performance.
r5   r  r   z%Did not simplify complex index:
%s
%s)r  r  r`   r   r   r  r   r   r  	Exceptionr6   r  r  warningr   r   r   r   r   )
r  r  r  r  priority_idxr  r  r:  r   r   s
             rv   r  %ComputedBuffer._apply_loop_reorderingz  s#    	/L	, )(D   --dM(   w<3|#44WQZCM :   /',"OPQE $))5aq5)l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 4CAC D2C A%D/.D/c                6    U R                   R                  5       $ rq   r3  r(  rg  s    rv   r(  !ComputedBuffer.get_reduction_size      yy++--rx   c                6    U R                   R                  5       $ rq   r3  r%  rg  s    rv   r%  !ComputedBuffer.get_reduction_type  r  rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r.  ComputedBuffer.is_no_op  rr  rx   c                    gNTr   rg  s    rv   r  ComputedBuffer.should_allocate  r  rx   c                8    U R                   R                  U5      $ )r+  r3  r1  r  s     rv   r1  !ComputedBuffer.constant_to_device  s    yy++F33rx   r   rN  ri  rh  r?  rg  r`  rk  rU  )r   zCallable[..., None])r   zOptional[list[int]]r>  )r   zetuple[tuple[list[sympy.Expr], list[sympy.Expr]], LoopBody, tuple[list[sympy.Expr], list[sympy.Expr]]]NN)r  *Optional[tuple[dict[Any, Any], list[Any]]]r  Optional[Callable[..., Any]]r   z:tuple[tuple[list[sympy.Expr], list[sympy.Expr]], LoopBody]rq   rl  rI  rn  )r   r   r   r   r   r  r  rs  rt  r  r"  r  r  r   r  rK   r  r  rr  r  r(  r%  r.  r  r1  r   r  r  s   @rv   r  r  d  s    
K	%%* %*
!
	!
6%G%N% J
J JD RVBFq2$Nq2 $@q2 
D	q2f  !B !BF..,4 4rx   r  c                     ^  \ rS rSrSr        SU 4S jjrSS jrS rSS jrSS jr	SS jr
  S   SS	 jjrS
rU =r$ )TemplateBufferi  zh
Represents a Triton (in the future other type) of template operator
that we can fuse an epilogue onto.
c                   > [         TU ]  S US9  [        R                  U5      U l        X0l        [        R                  R                  U 5      U l	        [        R                  R                  U 5        g r  )r  rs  InputsKernelunwrap_storagerM  make_kernel_renderr`   r   register_bufferr   register_operation)r`  r4  rM  r)  r  s       rv   rs  TemplateBuffer.__init__  sW     	d62"11&9"4GG++D1		""4(rx   c                     U R                  SS9$ )NT	normalize)r?   rg  s    rv   r  TemplateBuffer.get_read_writes  s    ''$'77rx   c           
       ^^^ U R                  5       mU R                  5       R                  5       mUU4S jn[        R                  " X R                  5       SUS9nU R                   Hf  mTR                  R                  5       mUU4S jnU=R                  [        R                  " UTR                  5       SSS9R                  -  sl        Mh     U$ )Nc                b   > [        U5      S:X  d   e[        R                  " TT" U 5      S5      $ )Nr   fake)r   r^   r!  )r   r  r$  r   s     rv   dummy1TemplateBuffer.extract_read_writes.<locals>.dummy  s,    v;!###99T75>6::rx   r   r.  c                ~   > [        U5      S:X  d   e[        R                  " TR                  5       T" U 5      5        g r  )r   r^   r  r  )r   r  r$  rO  s     rv   r4  r5    s-    6{a'''8rx   T)	r  r   r  r7   r?   r   rM  r4  r  )r`  r/  r4  depsr$  rO  r   s       @@@rv   r?   "TemplateBuffer.extract_read_writes  s    }}//#002	; //==?B)
 ;;Cjj--/G9 JJ,::s||~rTeJ  rx   c                6    [         R                  R                  $ rq   )r   r  r  rg  s    rv   r(  !TemplateBuffer.get_reduction_size  s    ww{{rx   c                    g rq   r   rg  s    rv   r%  !TemplateBuffer.get_reduction_type  r  rx   c                    gr  r   rg  s    rv   r  TemplateBuffer.should_allocate  r  rx   c                *    U R                  5       S4S 4$ rY  r  )r`  r  r  s      rv   r  #TemplateBuffer.simplify_and_reorder  s$      
 	
rx   )rM  r)  r   )r4  r  rM  Sequence[IRNode]r)  r  r   r   rg  rl  rN  rI  r!  )r  r"  r  r#  )r   r   r   r   r!  rs  r  r?   r(  r%  r  r  r   r  r  s   @rv   r%  r%    ss    

)
) !
) /	
)
 

)82
 RVBF
$N
 $@
 
rx   r%  c                  x   ^  \ rS rSr  S     S	U 4S jjjr S
   SU 4S jjjrSS jrSS jrSS jrSr	U =r
$ )TritonTemplateBufferi  c                B  > [         T
U ]  XU5        X@l        U /U l        Ub  [        R
                  R                  R                  [        R
                  R                  R                  4n[        R                  R                  R                  nXv;   d   SU SU 35       eU R                  S   R                  5       nU =R                  U V	s/ s H  n	[        [!        US9X5      PM     sn	-  sl        U(       a  UO	[#        5       U l        SU l        SU l        gs  sn	f )a  
NOTE:[TritonTemplates with multiple outputs]
We want the ability for TritonTemplates to output multiple tensors. Triton
kernels have no notion of outputs and this is done by creating tensors that
are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
support creating multinode outputs for triton templates.
We work around this by creating an extra input buffer during the lowering
and we mark them as mutated inputs.
Nz$Mutated inputs are only allowed for z	 but got r   r  )r  rs  mutated_inputsoutputsr   r^   higher_orderflex_attentionflex_attention_backwardr`   r   current_noder  rM  r   MutationOutputr  r0   allowed_prologue_inpssubgraph_inpssubgraph_outs)r`  r4  rM  r)  rE  rL  allowed_setrJ  r   r  r  s             rv   rs  TritonTemplateBuffer.__init__  s   " 	);<,&*V% 		&&55		&&>>K 77//66L. 6{m9\N[. [[^..0FLL))C z8#D) L &;!
 	" SW?Cs   Dc                ,  > [         TU ]  U5      nU R                  (       a  U R                  O/ nU R                  (       a  U R                  O/ nU Hz  n[	        U[
        R                  5      (       a  UR                  [        XQ5      5        M>  [	        U[        5      (       a"  UR                  UR                  U5      5        Mu  Uc  Mz   e   U H?  n[	        U[        5      (       a"  UR                  UR                  U5      5        M:  Uc  M?   e   U$ rq   )
r  r"  rN  rM  rr   r   r   updater   rn   )r`  r   resrN  rM  rO  r   r  s          rv   r"  )TritonTemplateBuffer.get_free_symbol_uses   s     g*=9.2.@.@**b.2.@.@**b C#uzz**

+C?@C((

333MBC{"{ ! !C#v&&

333MBC{"{	 ! 
rx   c                    U R                   $ rq   )rF  rg  s    rv   r   TritonTemplateBuffer.get_outputs7  r  rx   c                    U R                   $ rq   )rL  rg  s    rv   get_allowed_prologue_inps.TritonTemplateBuffer.get_allowed_prologue_inps:  s    )))rx   c                &    SU R                    S3nU$ )NzTritonTemplateBuffer(layout=r  r  )r`  r   s     rv   r  TritonTemplateBuffer.__str__=  s    ,T[[M;
rx   )rL  rE  rF  rM  rN  r!  )rE  zOptional[Iterable[IRNode]]rL  zOptional[OrderedSet[str]]r   r   r`  rk  r  r?  r[  )r   r   r   r   rs  r"  r  rX  r  r   r  r  s   @rv   rC  rC    sl     6:;?)D
 3)D  9)D 
)D )DX %*!	! .* rx   rC  c                     ^  \ rS rSrSr          SU 4S jjrSS jrSS jrS rSS jr	SS jr
SS	 jrSS
 jrSS jrSrU =r$ )ChoiceCalleriE  a  
Represents a possible choice used in autotune_process.py.
During autotuning, self.benchmark() is first called to get benchmark result,
and if this choice is selected, self.output_node() is called to get the output_node.

Children classes: TritonTemplateCaller, CUDATemplateCaller.
c                R   > [         TU ]  5         Xl        X0l        X l        X@l        g rq   )r  rs  r   r4  r   description)r`  r   r   r4  r_  r  s        rv   rs  ChoiceCaller.__init__N  s(     		& 'rx   c                  ^^ U R                  5       m[        R                  (       a  [        UU4S j5      $ [        R
                  " TTSU05      $ )Nc                    > T " T6 $ rq   r   )algor   s   rv   r  (ChoiceCaller.benchmark.<locals>.<lambda>`  s	    D$Krx   r   )to_callabler6   /profile_bandwidth_with_do_bench_using_profilingrP   rF   	benchmark)r`  r   r   rc  s     `@rv   rg  ChoiceCaller.benchmark]  s?    !AA+,?@@$$T4%>>rx   c                    [         erq   r}  rg  s    rv   	call_nameChoiceCaller.call_namec  r  rx   c                    [         erq   r}  rg  s    rv   re  ChoiceCaller.to_callablef  r  rx   c                "    U R                  5       $ )z
Hash key for the underlying kernel. By default, we assume there are no
runtime params, so kernel hash key defaults to choice caller's hash key.
)hash_keyrg  s    rv   kernel_hash_keyChoiceCaller.kernel_hash_keyi  s    
 }}rx   c                    [         erq   r}  rg  s    rv   ro  ChoiceCaller.hash_keyp  r  rx   c                    [         erq   r}  rg  s    rv   rI  ChoiceCaller.output_nodes  r  rx   c                    0 $ )zRInformation returned here is logged to the autotune log file when that is enabled.r   rg  s    rv   	info_dictChoiceCaller.info_dictv  s    	rx   c                    g)Nunsupported_choicer   rg  s    rv   autoheuristic_idChoiceCaller.autoheuristic_idz  s    #rx   )r_  r   r4  r   )
r   r   r   r  r4  r  r_  r   r   r   )r   r  r[  )r   rm   )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]])r   r   r   r   r!  rs  rg  rj  re  rp  ro  rI  rw  r{  r   r  r  s   @rv   r]  r]  E  sj    '' "' 	'
 ' 
'?""""$ $rx   r]  c                      \ rS rSrSS jrSrg)TritonTemplateCallerBasei~  c                    [         erq   r}  rg  s    rv   get_make_kernel_render/TritonTemplateCallerBase.get_make_kernel_render  r  rx   r   N)r   r   )r   r   r   r   r  r   r   rx   rv   r~  r~  ~  s    "rx   r~  c                     ^  \ rS rSrSr            S
U 4S jjr\SS j5       r\SS j5       r\	R                  SS j5       rSS jrSS jrS	rU =r$ )MultiTemplateBufferi  a3  
Represents a Buffer with multiple backing implementation choices.

Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
Otherwise, the fastest base choice will be chosen.
c                z   > [         TU ]  UUS US9  X0l        S U l        X l        [        S U 5       5      U l        g )N)r4  rM  r)  rL  c              3     #    U  H]  n[        U[        5      =(       dA    [        U[        R                  R                  R
                  5      =(       a    UR                  v   M_     g 7frq   )rr   r~  r   	_inductorselect_algorithmExternKernelCallerhas_out_variant)r  choices     rv   r  /MultiTemplateBuffer.__init__.<locals>.<genexpr>  sU      %
 - v78 65??#C#C#V#VW +**
 -s   A%A')r  rs  _choice_timings_fn_choice_timingsoriginal_inputsr  _output_plannable)r`  r4  rM  choice_timings_fnunfiltered_choicesrL  r  s         rv   rs  MultiTemplateBuffer.__init__  sU     	#"7	 	 	
 #4DH%!$ %
 -%
 "
rx   c                    U R                   $ )zN
Are all possible choices TritonTemplates or Extern Kernels with out variants
)r  rg  s    rv   output_plannable$MultiTemplateBuffer.output_plannable  s    
 %%%rx   c                ^    U R                   c  U R                  5       U l         U R                   $ rq   )r  r  rg  s    rv   choice_timings"MultiTemplateBuffer.choice_timings  s+    '#'#:#:#<D ###rx   c              #    #    [        U[        R                  R                  R                  5      (       d   eU R
                  UR
                  :X  d   eU R                  nUR                  5       U l         S v   X l        g ! X l        f = f7frq   )rr   r   r  r  TritonTemplateCallerr4  r)  r  )r`  callerrenders      rv   swap_as_triton_caller)MultiTemplateBuffer.swap_as_triton_caller  sn     &%//"B"B"W"WXXXX{{fmm+++(("("?"?"A	-&,#f#s   A3B6B :BB		Bc                @   [        U[        R                  R                  R                  5      (       d   eU R                  5       UR                  R                  :X  d   eU R                  5       UR                  R                  :X  d   eUR                  5       U l        g rq   )rr   r   r  r  r  r   r4  r   r  r   r  r)  )r`  r  s     rv   finalize_as_triton_caller-MultiTemplateBuffer.finalize_as_triton_caller  ss    &%//"B"B"W"WXXXX}}&--"4"4444 FMM$8$8888"("?"?"Arx   c                r    [        U R                  U R                  R                  S9nXR                  U   4$ )Nr  )rC  r  r  )r`  
min_choices     rv   get_min_choice"MultiTemplateBuffer.get_min_choice  s4    ,,$2E2E2I2IJ
//
;<<rx   )r  r  r  r)  r  )r4  r  rM  r   r  z'Callable[[], dict[ChoiceCaller, float]]r  zlist[ChoiceCaller]rL  r@  r   r   rI  )r   zdict[ChoiceCaller, float])r  r~  )r  r~  r   r   )r   ztuple[ChoiceCaller, float])r   r   r   r   r!  rs  ru  r  r  rs  rt  r  r  r  r   r  r  s   @rv   r  r    s    

 
 C	

 /
  /
 

4 & & $ $
 	- 	-B= =rx   r  c                  L   ^  \ rS rSr        SU 4S jjrS rSS jrSrU =r$ )CUDATemplateBufferi  c                J   > [         TU ]  XU5        X@l        XPl        X`l        g rq   )r  rs  workspace_sizetemplatesupports_epilogue_fusion)r`  r4  rM  r)  r  r  r  r  s          rv   rs  CUDATemplateBuffer.__init__  s&     	);<, (@%rx   c                8    U R                   b  U R                   $ S$ r  )r  rg  s    rv   r  %CUDATemplateBuffer.get_workspace_size  s    &*&9&9&Et""L1Lrx   c                ~    U R                  5        H)  n[        R                  " UR                  5       S S 5        M+     g rq   )r  r^   r!  r  )r`  rA  s     rv   emulate_store_fn#CUDATemplateBuffer.emulate_store_fn  s,    &&(FIIfoo't4 )rx   )r  r  r  )r  rs   r  rc   r  r   r   r   r>  )	r   r   r   r   rs  r  r  r   r  r  s   @rv   r  r    sE    A
 A A #'A 
AM5 5rx   r  c                  <   ^  \ rS rSrSU 4S jjrSU 4S jjrSrU =r$ )CppTemplateBufferi  c                L   > [         TU ]  XU5        X@l        XPl        S U l        g rq   )r  rs  r  r  rF  )r`  r4  rM  r)  r  r  r  s         rv   rs  CppTemplateBuffer.__init__  s$    );< /3rx   c                6  > [        U R                  [        5      (       al  [        U R                  [        5      (       d   eU R                  S   n[        U[
        5      (       d   eUR                  n[        U[        5      (       d   eU$ [        TU ]!  5       $ r  )	rr   r4  MultiOutputLayoutrF  r   r)  r  r  r   )r`  first_outputr4  r  s      rv   r   CppTemplateBuffer.get_layout  s}    dkk#455dllH5555<<?LlF3333!((Fff----M7%''rx   )r  rF  r  r>  rG  )r   r   r   r   rs  r   r   r  r  s   @rv   r  r    s    4	( 	(rx   r  c                  l    \ rS rSr% S\S'   SS jrSS jr\SS j5       r\	S 5       r
SS jrSS	 jrS
rg)r'  i  r  rM  c                  ^ [         [        R                     " 5       n[        R                  mU R                   Hq  n[        U[        5      (       a  UR                  U4S jU 5       5        M5  [        U[        5      (       a  ML  UR                  T" UR                  5       5      5        Ms     [         [        R                     " U4S jU R                  5        5       5      n[        R                  " UU[        5       S9$ )Nc              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7frq   rz  )r  ru   r  s     rv   r  /InputsKernel.get_read_writes.<locals>.<genexpr>  s     BEqWQZZ\22E   #&c              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7frq   rz  )r  r  r  s     rv   r  r    s#      .
/AGCLLN##/Ar  )r  writesindex_exprs)r0   r7   r<   r  rM  rr   r   rR  r   r@  r  r  
ReadWrites)r`  r  inputr  r  s       @rv   r  InputsKernel.get_read_writes  s    <++,.&&[[E%&&BEBBE#899		'%.."234 ! L,,- .
/3/?/?/A.
 
 &&"
 	
rx   c                6    U R                  5       R                  $ rq   r  rg  s    rv   rs  InputsKernel.get_reads  r  rx   c                   [        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  n[        U[        5      (       a*  [        U[
        5      (       d  [        R                  U5      n[        U[        5      (       a  U R                  U5      $ [        U[        5      (       a  U$ [        U[        [
        45      (       d   U5       eU$ rq   )rr   rm   r3  r(  r3  r8  r  realize_inputunwrap_storage_for_inputTorchBindObjectr)  r  ru   s     rv   r  %InputsKernel.unwrap_storage_for_input  s    a##Aa$$Aa"":a+I+I**1-Aa##
 //22a))H!fo677::7rx   c                    / nU  Hd  n[        U[        5      (       a&  U Vs/ s H  n[        R                  U5      PM     nnO[        R                  U5      nUR	                  U5        Mf     U$ s  snf rq   )rr   r   r'  r  r  )rM  
inputs_newru   r   s       rv   r(  InputsKernel.unwrap_storage%  sk    
A!T""GHIq!\::1=qI 99!<a   	 Js   A/c                    gr  r   rg  s    rv   r+  InputsKernel.is_extern0  r  rx   c                    gr  r   rg  s    rv   r  InputsKernel.num_reads3  r   rx   r   Nrg  rh  )ru   rn   r   rn   rI  ri  )r   r   r   r   r   r  rs  r  r  rr  r(  r+  r  r   r   rx   rv   r'  r'    sD    
,,  $  rx   r'  c                  (    \ rS rSrSS jrSS jrSrg)	NopKerneli7  c                    gr  r   rg  s    rv   r.  NopKernel.is_no_op8  r  rx   c                    [        5       $ rq   r/   rg  s    rv   rs  NopKernel.get_reads;  r  rx   r   NrI  rh  )r   r   r   r   r.  rs  r   r   rx   rv   r  r  7  s    rx   r  c                  V    \ rS rSrSr\S 5       r\S	S j5       r\S 5       rS
S jr	Sr
g)ConcatKerneli?  zb
There isn't actually a real kernel for concat, we just change the
storage for the upstream data.
c                	   US   R                  5       nUS   R                  5       n[        US   R                  5       5      nS/nXR   /nSUs=::  a  [	        U5      :  d   e   e[        S[	        U5      5       H  nX   R                  5       n	UR                  XR   5        [	        U	5      [	        U5      :X  d   eX   R                  5       U:X  d   eX   R                  5       U:X  d   e[        [	        U5      5       HE  n
X:X  a  XZ   X   -   XZ'   M  [        R                  R                  R                  XZ   X   5      XZ'   MG     UR                  XR   5        M     [        R                  U5      n[        R                  (       a#  [        R!                  XUS   R"                  5      n[        [	        U5      5       H|  nX   n[%        U5      (       d  M  UR'                  5       n[)        U[*        5      (       d  M@  [        R-                  UR.                  UR0                  5      (       d  Mq  [3        U5      n  O   [5        S U 5       5      n[        R                  R6                  R8                  S   n[)        U[        5      (       d   eUSL a"  [5        S U 5       5      (       a  [3        U5      n[;        S [+        UUUUS9/ S9n[=        U5      n/ n[        [	        U5      5       GH  nU R?                  X   [@        RC                  UX&U   Xx   SS95      nURD                  R                  U5        [)        X   RF                  [H        5      (       a  X   RF                  RK                  5       nOX   RF                  nURM                  5       (       d  M  [O        X   R                  5       RP                  5      (       d  M  [S        U5      (       a  M  UR                  URU                  5       5        GM     [	        U5      S:  aR  [        R                  RW                  U[X        RZ                  5      (       a  [        R                  R]                  U5        [        R                  R_                  U5      Ul0        U Rc                  URD                  5      Ul"        [        R                  Re                  U5        U$ )	Nr   r5   c              3  8   #    U  H  n[        U5      v   M     g 7frq   )r   r8  s     rv   r  &ConcatKernel.create.<locals>.<genexpr>n  s     -WPV1.CA.F.FPVrq  Fc              3    #    U  Hv  nS UR                   ;   =(       a[    UR                   S    R                  [        R                  S9=(       d*    UR                   S    R                  [        R                  S9v   Mx     g7f)r  r  N)rF  r   r   r  r  r  args     rv   r  r  r  sr      <
 $ SXX --E<O<O-P W88E?00u?U?U0V
 $s   A>B )r   r   r   r   r   r4  rM  )rB  )3r   r   r   r   r   r   r  r`   r   r   r  r   r   r6   r  r  r  r   r   r   rr   r6  r  r   r   r"   rB  rJ  r   r  r(  rC  r5  r  rM  r3  r3  r  r  rW   r   rV   r7  r  r8   FOREACHregister_operation_listr*  r   r(  r+  )r  rM  r(  r   r   r  offsets_startoffsets_endr   
input_sizer  output_strideru   r4  any_input_is_storage_and_layoutfx_node_argsconcat_kernelkernelop_namesinput_bufferinput_unwrappeds                        rv   r  ConcatKernel.createE  s   %%'q	##%q	**,-}oC'#h-'''''q#f+&A++-J  /z?c(m3339&&(E1119'')V3333x=)8"*+
"=HK"#''"2"2"?"? Z]#HK	 * x}- ' '99(C''"//M
 s6{#A	A$Q''K 88fmmTT$B8$LM $ +.-WPV-W*W'ww++003,----*e3 <
 $<
 9
 9
 ;8DM$$	 	
 M*s6{#A++	  Cq!1;> ! L   ''5&)..(33"().."<"<">"()..  //1169//16677"<00 ? ? AB' $* x=1!4!4V^=S=S!T!TGG++H5WW44]C"11-2F2FG	""=1rx   Nc                   [        U[        5      (       a  U R                  UR                  U5      $ [        UR                  [        5      (       a  [        UR                  R
                  [        5      (       a  UR                  R                  (       d  gUc  g[        UR                  5       5      [        UR                  5       5      :X  d  g[        S [        UR                  5       UR                  5       5       5       5      $ [        UR                  R
                  [        5      =(       a    [        UR                  [        5      (       + $ )NFTc              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7frq   r.  r/  s      rv   r  =ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s1      EFB   88@@Er1  )rr   rm   can_realize_into_without_copyr3  r  r4  r6  r  r   r  r  r   r   ExternKernelAlloc)r  r@  rA  s      rv   r  *ConcatKernel.can_realize_into_without_copy  s    c9%%44SXXsCCchh 344sxx<<xx00 { s~~'(C0@,AA !#.."2CNN4DE  
 #((//>: 
:HH'D
 @
 	
rx   c                   [        U[        5      (       d&  [        U5      (       a  [        U5      u  p4[        X4S9n[        U[        5      (       d   U5       e[        U[        5      (       a  U R                  UR                  U5      $ [        U[        5      (       ai  UR                  5         [        UR                  S5      (       d   eU R                  X5      (       a&  [        U5      UR                  l        UR                  $ [        R                  UR                  5       UR!                  5       UR#                  5       [%        UR'                  5       UR'                  5       5       VVs/ s H.  u  pV[(        R*                  R,                  R/                  XV5      PM0     snnS9nU R                  Xr5      $ s  snnf )Nr2  r4  r-  )rr   r8  r   r5  rm   rC  r3  r(  r  r  r  r  r4  r  r  r   r   r  r   r   r`   r   r   r  )r  r@  rA  r;  r4  rS  rT  pws           rv   rC  ConcatKernel.realize_into  sV   
 #//$S))"7"<%7B#//44/c9%%##CHHc22c:&&KKM388X....00::"1#"6xx>>#--/__&  ??DA   --a3?	  
 ((s   .5F:c                    gr  r   rg  s    rv   r  ConcatKernel.should_allocate  r  rx   r   rq   rI  )r   r   r   r   r!  r  r  r  rC  r  r   r   rx   rv   r  r  ?  sL    
 ^ ^@ 
 
< ) )@rx   r  c                     ^  \ rS rSr% SrS\S'   \R                  " \S9r	S\S'   Sr
S	\S
'   SrS\S'   SrS\S'   \R                  " \S9rS\S'   SrS\S'   SrS\S'   SrS\S'   \R                  " \S9rS\S'   \R                  " \S9rS\S'          S> S?U 4S jjjrS@S jrSAS jrS rS rS?S jrS  rSBSCS! jjrSDS" jrS# r\S$ 5       r\   SES% j5       r!\ S& 5       r"\ S' 5       r#\ S( 5       r$\    SF   SGS) jj5       r%\ SHS* j5       r&\ SHS+ j5       r'\ S, 5       r(\ S- 5       r)\ S. 5       r*\ S/ 5       r+S?S0 jr,S1 r-SBSIS2 jjr.S3 r/S4 r0SHS5 jr1SJS6 jr2S?S7 jr3S?S8 jr4S9 r5S: r6 SH   SKS; jjr7SJS< jr8\8r9S=r:U =r;$ )Lr  i  r   ztuple[Any, ...]constant_argsr   zdict[str, Any]r   NzOptional[ReinterpretView]output_viewrO  python_kernel_namecpp_kernel_namezIterable[str]ordered_kwargs_for_cpp_kernelzFOptional[Union[torch._ops.OpOverload, torch._ops.HigherOrderOperator]]op_overloadzOptional[list[dict[str, Any]]]arg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                2  > [         TU ]  UUUS9  X@l        U(       a  UO0 U l        X`l        Xl        U R                  U5        U R                  U5        Xl        U R                  5         0 U l
        / U l        [        R                  R                  U l        g Nr  )r  rs  r  r   r  r	  set_cpp_kernel_nameset_python_kernel_namer	  collect_arg_kwarg_propertiesr	  r	  r`   r   rJ  fx_node)r`  r   r4  rM  r  r   r  r 	  r	  r	  r	  r  s              rv   rs  ExternKernel.__init__   s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww++rx   c                     U /U R                   Q$ rq   r	  rg  s    rv   r  ExternKernel.get_outputs  s    -t,,--rx   c                    [        5       $ rq   r/   rg  s    rv   r  %ExternKernel.get_unbacked_symbol_defs!  r  rx   c                   [        U R                  [        R                  R                  5      (       af  U R                  R
                  R                   Vs/ s H:  nUR                  (       a  M  UR                  UR                  UR                  S.PM<     snO.[        [        U R                  5      5       Vs/ s H  n0 PM     snU l        [        U R                  [        R                  R                  5      (       aS  U R                  R
                  R                   Vs0 s H'  nUR                  UR                  UR                  S._M)     snO0 U l        [        U R                  [        R                  R                  5      (       a  U R                   (       dR  U R                  R
                  R                   Vs/ s H!  oR                  (       d  M  UR                  PM#     snU l        U R                  R
                  R                   Vs/ s H  oR                  (       d  M  UPM     snU l        g / U l        g s  snf s  snf s  snf s  snf s  snf )N)r   r   r  )r   r  )rr   r	  r   _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   rM  r	  allarg_propertiesr	  schema_kwargs)r`  ru   r   s      rv   r	  )ExternKernel.collect_arg_kwarg_properties$  s    $**EJJ,A,ABB ))11;; <A||FFKK%&__
 < $C$4565"56 	$ $**EJJ,A,ABB ))11;;;A qOO;
  	 d&&

(=(=>>55$($4$4$<$<$F$F6$Fq,,FAFF$F62  ++33=="=a="D "$D? 76"s0   I/(I:I#.IIII+Ic                    [        U R                  [        5      (       a!  U R                  5         U R	                  5         g g rq   )rr   r4  r   apply_constraintr  rg  s    rv   r  ExternKernel.decide_layoutI  s0    dkk>22!!#  3rx   c                P    [        X5      u  p#U(       a  UR                  U5        g g rq   )rS   make_comment)r`  wrapper
origin_str_detailed_origin_strs       rv   codegen_commentExternKernel.codegen_commentN  s%    +>t+M(
  , rx   c                    [         erq   r}  r`  r#	  s     rv   codegenExternKernel.codegenS  r  rx   c                   Xl         [        R                  R                  (       a3  [	        U R
                  [        R                  R                  5      (       d  g U R
                  nU R                   c  UR                  S:X  aV  UR                  S:X  a  UR                  R                  S5      S   OUR                  R                  SS5      nSU S3U l         g UR                  R                  U l         g g )Natenr4  .r   r   z
at::_ops::z::call)r	  r`   r   cpp_wrapperrr   r	  r   r	  r	  	namespace_overloadnamer   r  replacer	  r   )r`  r	  r  opnames       rv   r
	   ExternKernel.set_cpp_kernel_nameV  s    .ww""*ejj33+
 +
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (rx   c                   Xl         Ub  g U R                  nUc  g [        U[        R                  R
                  5      (       a  SUR                   3U l         g UR                  R                  SS5       SUR                   3U l         g )Nztorch.ops.higher_order.._ops..ops.r.	  )	r 	  r	  rr   r   r	  HigherOrderOperatorr   r   r2	  )r`  r 	  r  s      rv   r	  #ExternKernel.set_python_kernel_namen  s    "4)!!>

 > >??(??P&QD# $$,,Xw?@&//ARS #rx   c                :   U R                  5       =n(       a  UR                  O[        R                  R                  n[        R                  R
                  (       a4  [        R                  R                  R                  U R                  U5      $ U R                  $ rq   )
r   r   r`   r   device_typer/	  r  get_c_shim_func_namer	  r 	  )r`  dr   s      rv   get_kernel_nameExternKernel.get_kernel_name}  sn    !%!22A29L9L ww"" GG  55d6J6JFS	
 ((	
rx   c           	         [         R                  U R                  5       U R                  5       U R	                  5       U R                  5       U R                  5       U R                  5       S9nUR                  5         U$ )N)r   r   r  r  rY  rW  )	r  r  r   r   r  r   r|  rx  r  )ru   r  s     rv   
copy_inputExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	rx   c                	  ^^ X#S.n[         R                  " U5      u  nm/ m/ n/ nU H  nTR                  [        U[        5      =(       a    [        U[
        5      (       + 5        TS   (       a  UR                  U5        M[  [        U[        R                  5      (       a2  [        R                  R                  R                  R                  US S9nUR                  U5        M     UU4S jn	U V
s/ s H  oR                  U
5      PM     nn
U H  n
[        U
5      (       d  M  [        U
SS9  M!     / nU GH  n
[        U
[         5      (       dh  U
R#                  5       [        R                  R$                  ;   a<  UR                  [        R                  R$                  U
R#                  5          5        M  [        U
[         5      (       dh  U
R#                  5       [        R                  R&                  ;   a<  UR                  [        R                  R&                  U
R#                  5          5        M  [        U
[(        5      (       a"  UR                  U
R+                  5       5        GM5  [        U
[,        R.                  R0                  R
                  5      (       ar  U
R2                  R4                  nU
R2                  R6                  S:X  a  Uc   eUR                  [,        R8                  R:                  U   R=                  5       5        GM  UR                  [?        U
SS95        GM     U	" X5      u  pU" U0 UD6nS n[        R@                  R                  =n(       a  [        RB                  RD                  RG                  S	5      n[I        5       n[        RB                  RJ                  [,        RL                  RN                  RP                  :X  a  US
   n[S        [        RB                  5      nU   [U        U[        RB                  U5        S S S 5        [W        UUU5      n[        U[X        [Z        45      (       d  U/OUnU H  n[        U[,        R\                  5      (       d  M$  UR^                  (       d  M7  Sn[        R                  RB                  RD                  RG                  SS 5      =n(       a  U SU 3nU[        R                  l0        M     UUUU	U4$ s  sn
f ! , (       d  f       N= f)N)r   r   r  )r  c                6  > / n[        U 5      n[        U5      nT H@  nU(       a  UR                  [        U5      5        M&  UR                  [        U5      5        MB     [        R                  " UT5      nUR                  S/ 5      UR                  S0 5      4$ )Nr   r   )iterr  nextpytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          rv   unflatten_args3ExternKernel.process_kernel.<locals>.unflatten_args  s    Fo.J!"56N*	MM$z"23MM$~"67	 +
 %%fi8A55$aeeHb&999rx   Tr  r	  )r   r  r5   zEsparsity not handled. Please file issue for sparse inference weights.stack_tracez Found from : 
 )1rG	  tree_flattenr  rr   rn   GeneratorStater   r   r`   r   r   r   create_symintnoder  r   r5  r3  r  	constantstorchbind_constantsr  	get_valuer   r  irr   r   r   r	  default_generatorsclone_stater   r  rJ  rF  r  r   r  _higher_order_opseffectswith_effectsr%   r*   r&   r   r   Tensor	is_sparsedisable_cudagraphs_reason)r  r  r   r   binded_args	args_flattensor_argsnon_tensor_argsr  rP	  ru   example_argsdevice_indexnew_args
new_kwargsexample_outputr	  r   node_meta_valctxexample_out_lir   msgrR	  rN	  rO	  s                           @@rv   process_kernelExternKernel.process_kernel  s     $6%22;?	9%'C  3'O
30O,O R ""3'c5::..''**44FFsQUFVC&&s+ 
	: 6AA[((+[A A$Q''%a5  	 	 A a**qzz|qww?P?P/P##AGG$5$5ajjl$CDq(++JJLAGG$?$??##AGG$?$?

$MNA//##AKKM2Au11@@AA xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK' *  .lL8Z8JN---9-NN//33E:M-C~~$$(?(?(G(G(T(TT -a 0<Q^^L	1>>>J  9>=! ntUm<<  	
  A!U\\**q{{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471   
 	
O Bj s   4S'9S,,
S:c                   [        U[        5      (       d   e[        U[        5      (       a  U$ UR                  5       n[        R
                  R                  UR                  5       5      nUc   eUR                  5       nUb  SUR                  ;   a  [        UR                  [        5      (       a  UR                  S   R                  [        R                  S9(       d/  UR                  S   R                  [        R                  S9(       a)  UR!                  [#        UR%                  5       5      5        OUR'                  5         [(        R*                  " UR%                  5       SS9u  pVUS   nUR-                  5       " U5      n[        R
                  R.                  R1                  X5      n[        R
                  R.                  R3                  X5      n	[        R
                  R.                  R5                  X5      n
[7        Xy5      U
-   nX:w  a  [8        R;                  SU	U
U5        [<        e[        UR>                  [A        URC                  5       URE                  5       UR%                  5       U	U
S9S9$ )	z
In order to pass this to an extern kernel we need a
ReinterpretView not a View.  This allows us to avoid some
unneeded copies.
r  r  r  rk   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sr  r2  )#rr   r3  r8  r  r`   r   r-  r  r|  rF  r4  r   r   r   r  r  r  r"   r   r  r7   r  r  r   r  stride_vars
offset_varrX   r  r  r  r3  r6  r  r   )r  ru   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsr  r  r   r:  r7  expecteds               rv   convert_to_reinterpret_view(ExternKernel.convert_to_reinterpret_view
  s    !X&&&&a))H gg  !7!7!9: # 3 3 5 "-.333=//@@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

  ]
 ,  55eH''""..uA!!,,U?Z1F:IIR	 &%,,.kkmZZ\	
 		
rx   c                (   Uc
  [        5       $ [        U[        R                  [        R                  R
                  R                  [        45      (       a	  [        US9$ [        U[        5      (       aY  [        R                  R                  [        R                  " UR                  UR!                  5       UR#                  5       S95      $ [        U[$        5      (       a  U$ [        U[&        5      (       a  U R)                  UR*                  5      $ [        U[,        5      (       a1  [-        U R)                  UR*                  5      UR/                  5       S9$ [        U[0        5      (       a@  UR3                  5         [5        UR7                  5       5      (       a   U R9                  U5      $ [        U[<        5      (       a  UR3                  5         U$ [        U[>        [        45      (       a  U$ U RA                  U5      $ ! [:         a     Naf = f)N)r  r  r2  )!r  rr   r   r   r   r   r   rs   r   rb  r`   r   add_tensor_constantr   r9  r   r   r   r.  rm   r  r3  r8  r   r3  r  r   r  rx	  r  r(  NonTensorObjrA	  r  s     rv   r  ExternKernel.realize_inputO  s   9'))a%**ekk&9&9&A&A3GHH(a00a""77..QWWAKKM!,,.Q  a((Ha##$$QVV,,a))"&&qvv.q||~  a""IIK$Q]]_55::1== a$$IIKHa,(=>??H~~a   + s   H 
HHc                    [        U5      (       a@  [        UR                  5       5      S:X  a  U$ UR                  5        H  nUS:X  d  M  Us  $    U R                  U5      $ r  )r   r   r  rA	  )r  ru   r   s      rv   require_stride1ExternKernel.require_stride1p  sR     ##1<<>"a',,.Q;H ) ~~a  rx   c                
   Uc  Uc   eUR                  5       S;   a	  U(       d  U$ [        U5      (       Ga]  [        UR                  5       [        5      (       a  U(       ai  [        USS[        X5      (       aJ  [        [        R                  R                  R                  UR                  5       R                  5      5      OUUS9  U$ [        USSS UUS9  U$ [        UR                  5       [        [        45      (       ay  U(       a$  UR                  5       R                  U5      (       d>  U(       aG  [!        X1R                  5       R                  UR#                  5       5      (       a  Ub  [%        X5      $ U$ [        UR                  5       [&        5      (       a  [        UR                  5       R)                  5       [        5      (       a  [+        S5      e[        UR                  5       R)                  5       [        5      (       a  U(       a2  UR                  5       R)                  5       R                  U5      (       dM  U(       aH  [!        UUR                  5       R)                  5       R                  UR#                  5       5      (       a  U$ [        U[,        5      (       ak  U(       a$  UR                  5       R                  U5      (       d>  U(       a9  [!        X1R                  5       R                  UR#                  5       5      (       a  U$ [        U[.        5      (       a  [        UR0                  [2        5      (       a  [        UR0                  [4        5      (       d  [        UR7                  5       5      (       a}  [        UR7                  5       R0                  [8        5      (       dP   U R;                  UR0                  5      Ul        U(       a  U R=                  XUS9$ U(       a  U R?                  XUS9$  S nUR#                  5       nUb  [        R                  R                  n[C        [E        UR#                  5       5      5       Vs/ s HJ  nURG                  X8   S5      (       d  M  URI                  UR#                  5       U   S	5      (       d  MH  UPML     nnU H.  n	[J        RL                  RN                  RQ                  XSS
5      nM0     U RS                  U5      n[        USSUUUS9  U(       a  [        X5      (       d   e U$ U(       a<  Ub  Uc   e[J        RL                  RN                  RU                  X5      n[%        X5      $ U$ ! [@         a     GNgf = fs  snf )N)r   r5   TF)r  r%  r&  r  r$  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutr'  r   r   r5   )+r  r   rr   r   r   r5  r-  r   r`   r   r   
size_hintsr   r6  r  r,  r+  r   r?  r+  r1  r  r  rm   r3  r3  r8  r  r  rx	  require_stride_orderrequire_exact_stridesr  r   r   r#  r;  r   r  loweringslice_rA	  r  )
r  ru   r   r  r  expanded_dims	orig_sizer   r   r(  s
             rv   require_stridesExternKernel.require_stridesz  sm     M$===;;=F"=H !##!,,..99 *#(-
  B!KK - ! 0 0 ; ;ALLN<Q<Q R "'&3 H *#(-%)&3&3 HALLN[/,JKK1<<>;;EBB!1%||~'<'<ajjl  %0 4AE 
 ALLN,FGGalln88:NKK(b    : : <kJJq||~99;MMeTT%5)LLN668??JJL  H a%%q||~77>>-!<<>#8#8!**, 
 Hq)$$1668,,qvv77%ammo66q}}335FGG88@33 4   #44 5   # .2JJL	$ww''H s1::<011A33M4DaH  11!**,q/1E 1   %OO,,33AAqA %
 NN1!''	
 5a????  (]-FFF((//=A21DDW ' s*   36U *U U-6&U- U-
U*)U*c                "    U R                  XUS9$ )N)r  r  r	  )r  ru   r  r  s       rv   r	  "ExternKernel.require_exact_strides  s    ""- # 
 	
rx   c                "    U R                  XUS9$ )N)r   r  r	  )r  ru   r   r  s       rv   r	  !ExternKernel.require_stride_order  s    ""1"OOrx   c                .    U R                  U[        5      $ rq   )r	  r  r  s     rv   require_channels_last"ExternKernel.require_channels_last!  s    ''+<==rx   c                .    U R                  U[        5      $ rq   )r	  r  r  s     rv   require_channels_last_3d%ExternKernel.require_channels_last_3d%  s    ''+=>>rx   c                    S nU" U5      (       a  U$ U R                  U[        R                  UR                  5       5      5      $ )Nc                    S nU" U 5      [         R                  R                  ;   =(       a-    [         R                  R                  U" U 5         R                  $ )Nc                P     U R                  5       $ ! [        [        4 a     g f = frq   )r  AttributeErrorr  rt   s    rv   safe_get_namePExternKernel.require_contiguous.<locals>.is_mkldnn_tensor.<locals>.safe_get_name,  s+     ::<'&(;<   s    %%)r`   r   rV	  	is_mkldnn)ru   r	  s     rv   is_mkldnn_tensor9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor+  sH      a AGG$5$55 BGG%%mA&67AArx   r	  r   r   r   )r  ru   r	  s      rv   r  ExternKernel.require_contiguous)  sB    
	 AH,,>44QZZ\B rx   c                h    U R                  U[        R                  UR                  5       5      5      $ rq   r	  r  s     rv   require_contiguous_strides'ExternKernel.require_contiguous_strides?  s-     ((~00>
 	
rx   c                    g rq   r   rg  s    rv   r	  ExternKernel.apply_constraintG  r  rx   c                   [        U[        [        45      (       d   e[        U[        5      (       a  [        U5      nU R                  (       d   S5       e[	        U5      n[	        U R                  5      nX4:  aq  [
        R                  SU R                  XC-
  5        [        X45       H?  nU R                  U   S   nUR                  Xb;   a  X&   OU R                  U   S   5        MA     U$ )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )
rr   r   r   r	  r   r  r  r	  r   r  )r`  r   r   n_args
n_pos_argsr   arg_names          rv   fill_non_provided_args#ExternKernel.fill_non_provided_argsJ  s     $u....dE"":D""U$UU"T,,-
 II^  #	 6...q1&9) $,,Q/@ / rx   c                r   [         R                  R                  (       Ga[  / nS nU(       ae  U R                  (       aT  [	        U R
                  5      [	        U5      :X  d   S5       eU R                   Vs0 s H  oDR                  S5      U_M     nn[        U R
                  5       H  u  pVUb.  UR                  X   5      nU(       a  UR                  S5      OS nOb[	        U R                  5      U-   n	U R                  (       a7  U	[	        U R                  5      :  a  U R                  U	   R                  S5      OS nUR                  [         R                  R                  R                  Xh5      5        M     U$ [        [         R                  R                  R                  U R
                  5      $ s  snf )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )r`   r   r/	  r	  r   r  r  r   rM  r  r  val_to_arg_strr  )
r`  r  r  name_to_arg_propertiesr  r   ru   proptype_r   s
             rv   codegen_const_argsExternKernel.codegen_const_argsl  si   77F
 &*",,4--.#e*< Z< 594G4G*4GSGGFOS(4G ' * "$"4"45)5155eh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!KL 6 Mqww++::D<N<NOO%*s   4F4c                    [         R                  R                  (       aD  U R                  b7  U R	                  / U R
                  QU R                  QU R                  5      nSnOU R
                  nSn/ n[        U5       H  u  pE[         R                  R                  (       a  U R                  (       a  U[        U R                  5      :  d   S5       eU R                  U   R                  S5      nUR                  [         R                  R                  R                  XV5      5        M  UR                  [         R                  R                  R                  U5      5        M     U(       a  UR                  U R!                  5       5        U$ )NFTz-Invalid access to ExternKernel.arg_propertiesr   )r`   r   r/	  r	  r	  rM  r  r   r   r	  r   r  r  r  r	  r  r	  )r`  rM  need_codegen_constant_argsr   r   ru   r	  s          rv   codegen_argsExternKernel.codegen_args  s*   774#3#3#?003$++3 2 23T[[F */&[[F)-&f%DAww""**q3t7J7J3K/K CK ++A.226:AGG00??IJAGG00??BC & &KK//12rx   c                2   X;   a  UR                  U5      $ XR                  ;   a  U R                  R                  U5      $ U R                  (       a9  XR                  ;   a*  U R                  R                  U5      R                  S5      $ [        U S35      e)zGiven an argument name, queries for values in (in order):
1. any provided kwargs for this function.
2. the class self.kwargs member.
3. any available default arguments in self.allarg_properties.r  z not in self.allarg_properties)r  r   r	  r  )r`  r	  r   s      rv   get_kwargs_valueExternKernel.get_kwargs_value  s    
 ::h''{{";;??8,,!!h2H2H&H))--h7;;OLLz)GHIIrx   c           	     0   [         R                  R                  (       Ga  U R                  b  [	        U R
                  5      S:X  a  / $ / nU R                   H  nU(       a  US:X  a  M  U R                  U5      n[        U[        R                  5      (       a  UR                  U5        MU  U R                  (       a9  X0R                  ;   a*  U R                  R                  U5      R                  S5      OS nUR                  [         R                  R                  R                  XE5      5        M     U$ U R                   R#                  5        VVs/ s H3  u  pdU S[         R                  R                  R                  U5       3PM5     nnnU$ s  snnf )Nr   r   r   r  )r`   r   r/	  r	  r   r	  r	  r	  rr   r   r   r  r	  r  r  r	  r   r  )r`  skip_outr   r	  r  r	  ks          rv   codegen_kwargsExternKernel.codegen_kwargs  sK   77+D4F4F0G10L	F >>E 1))(3a,,MM!$  11hBXBX6X ..228<@@H! 
 MM!''"6"6"E"Ea"OP ?(  !KK--//DA #Qqww++::1=>?/   	s   :Fc                    U R                   bS  U R                   R                  n[        USS5      nUR                  SS5      nUR	                  SS5      S   nU SU 3nU$ SnU$ )	Nr   unknown_namespacer6	  r7	  r.	  r5   r   
unknown_op)r	  r  r   r2	  rsplit)r`  r  op_namespaceop_names       rv   get_op_nameExternKernel.get_op_name  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Grx   c                   [         R                  (       a  [        R                  R                  (       d  [        U R                  5       5      S:X  a  g [        R                  R                  R                  U R                  5       5      n[        R                  R                  R                  U R                  5       5      nU R                  5       nUR                  SU R                  5        SU SU SU< S3	5        g g g )Nr   zassert_size_stride(r  r  )r6   size_assertsr`   r   r/	  r[   r   r  codegen_shape_tupler  r	  r  r  )r`  r#	  r   r   r	  s        rv   codegen_size_asserts!ExternKernel.codegen_size_asserts  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;rx   c           	     j   [         R                  (       a  [        R                  R                  (       d~  U R                  5       nU[        R                  R                  ;  nU R                  5       nU(       a!  UR                  SU S[         SU< S35        g UR                  SU SU S35        g g g )Nzassert_alignment(r  r  z	# buffer z (op: z) is assumed to be not aligned)
r6   alignment_assertsr`   r   r/	  r  r0  r	  r  rT   )r`  r#	  r   alignedr	  s        rv   codegen_alignment_asserts&ExternKernel.codegen_alignment_asserts  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#rx   c                N    U R                  5       nU R                  5       nU/ /U4$ )z4
get output sizes and strides, for template_codegen
)r   r  )r`  _size_strides      rv   get_group_strideExternKernel.get_group_stride  s*     //#r{G##rx   c                   [         R                  R                  nU R                  5       nU R	                  5       nU Vs/ s H  oAR                  U5      PM     nn[        [        U5      5       Vs/ s H  n[        SU 35      PM     nn[        [        [        U5      5      UR                  SS9n[        U5       VV	s0 s H  u  pX_M	     n
nn	[        [        U
5      5       Vs/ s H  oZU   PM	     nnU Vs/ s H  oVU   PM	     nnU R                  5       nU" U5      n[         R                  R                  R                  XbU/5      u  pn[        S5      u  nn[        [!        Xo" U Vs/ s H  nU" U5      PM     sn5      5      5      n[#        [$        R&                  " U5      U5      nU[)        U5      4$ s  snf s  snf s  sn	nf s  snf s  snf s  snf )z3
Manually get canonicalization of the output index
r=	  T)r  r  c)r`   r   r   r   r  r   r   r   rY   r  r  r   r  r  r@   r   r   r\   r   r  r   )r`  r   r  r:  ru   r   r  index_orderr   r   r   r   r$  r   	new_sizesr   r  r   add_varreplacements                       rv   canonicalizeExternKernel.canonicalize  s   
 77##//#29:'Q%%a(':;@U;LM;La(1QC1;L
MU3w<0g6I6ISWX+4[+AB+Axs#(+AB$)#f+$67$6q$67-23UmU
3##%
#%&WW%5%5%E%Ew&
"	F !%
73z7	3R	1GAJ	3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F=1G G'G;G/Gc                    U(       a  [         O[        n[        [        R                     " 5       nU R
                   H  nX2" U5      -  nM     U R                  R                  5        H  nX2" U5      -  nM     U$ rq   )maybe_free_unbacked_symbolsmaybe_free_symbolsr0   r   r   r  r   r   )r`  r   maybe_get_symbolsr  r  s        rv   r"  !ExternKernel.get_free_symbol_uses  sq     ,9'>P 	 u||$&%%C"3''A &;;%%'C"3''A (rx   c           
     ,   [        U SS 5      nSU< 3/nU[        R                  " U 5       Vs/ s H'  nUR                   S[        XR                  5       3PM)     sn-  nUR	                  SU R
                  < 35        U R                  U5      $ s  snf )Nr 	  zpython_kernel_name=r  r  )r   rp  fieldsr   r  rY  r  )r`  kernel_namer  rq  s       rv   r  ExternKernel.__str__,  s    d$8$?!+1
 	$++D1
1 zzl!GD**5671
 	
 	|D$4$4#789u%%
s   .B)r	  r	  r  r	  r	  r   r	  r	  r	  r  r 	  r	  r	  r   NNNNr   Nr>  r  r  rq   r	  rO  r   r   )r 	  rO  r   r   )r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])NNF)r   zOptional[Sequence[int]]r  rL  r`  )r  rV  r[  rk  )<r   r   r   r   r  r   rp  rq  r   r   r  r 	  r	  r   r	  r	  r	  r	  r	  r	  rs  r  r  r	  r  r&	  r*	  r
	  r	  r>	  rr  rA	  r  ro	  rx	  r  r	  r	  r	  r	  r	  r	  r  r	  r	  r	  r	  r	  r	  r	  r	  r	  r	  r	  r	  r"  r  r
  r   r  r  s   @rv   r  r    s   %'M?'(..tDFND-1K*1(,,%)O]) 4?3D3D4!= 
 	    6:N29<@9@<G<M<M=9  .9->->t-T*T &(, 
, ,<.#$J!
-
";0
 
 
 u

u
 u
n B
 B
H ! !@ ! !  *.6:Z 'Z 4	Z Zx 
 

 P P > > ? ?  * 
 
 DP@4J:	
$'@ %*!	!
& Hrx   r  c                  T   ^  \ rS rSrSS jr       S SU 4S jjjrSS jrSrU =r$ )	ExternKernelOuti;  c                &    UR                  U 5        g rq   )generate_extern_kernel_outr)	  s     rv   r*	  ExternKernelOut.codegen=  s    **40rx   c
                   > [         T
U ]  S UU R                  U5      UU=(       d    0 S UUUU	5
        [        R                  R                  U 5      U l        [        R                  R                  U 5        g rq   )r  rs  r(  r`   r   r*  r   r+  )r`  r4  rM  r  r   r  r 	  r	  r	  r	  r  s             rv   rs  ExternKernelOut.__init__@  si     	'Lb)	
 GG++D1		""4(rx   c                    gr  r   rg  s    rv   r  ExternKernelOut.should_allocate[  r  rx   rl  r>  r	  rI  )	r   r   r   r   r*	  rs  r  r   r  r  s   @rv   r	  r	  ;  s=    1 &() 
) )6 rx   r	  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )RandomSeedsi_  c           	       > [         R                  " [         R                  5      n[        TU ]  [        U[         R                  U/S9/ UR                  UR                  U//SS[        R                  R                  S9  g )Nr  zaten.randint.low_outzat::_ops::randint_low_out::call)r4  rM  r  r 	  r	  r	  )r   r  r  r  rs  r6  rC  r  r-	  randintlow_out)r`  countr   limitsr  s       rv   rs  RandomSeeds.__init__`  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
rx   r   )r	  rs   r   rT  r   r   r   r   r   r   rs  r   r  r  s   @rv   r	  r	  _  s    
 
rx   r	  c                  X   ^  \ rS rSrSS jr      S SU 4S jjjrS	S jrS rSrU =r	$ )
r  is  c                &    UR                  U 5        g rq   )generate_extern_kernel_allocr)	  s     rv   r*	  ExternKernelAlloc.codegent  s    ,,T2rx   c	                   > [         T	U ]  S UU R                  U5      UU=(       d    0 S UUUU5
        / U l        [        R
                  R                  U 5      U l        [        R
                  R                  U 5        g rq   )	r  rs  r(  rF  r`   r   r*  r   r+  )
r`  r4  rM  r  r   r 	  r	  r	  r	  r  s
            rv   rs  ExternKernelAlloc.__init__w  sp     	'Lb)	
 ')GG++D1		""4(rx   c                    gr  r   rg  s    rv   r  !ExternKernelAlloc.should_allocate  r  rx   c                    [         erq   r}  rg  s    rv   r	  "ExternKernelAlloc.apply_constraint  r  rx   )r   rF  r>  )r   NNNr   NrI  )
r   r   r   r   r*	  rs  r  r	  r   r  r  s   @rv   r  r  s  s?    3 &() 
) )<" "rx   r  c                  N   ^  \ rS rSrSrSU 4S jjrS	S jrS
S jrSS jrSr	U =r
$ )rK  i  zH
An output buffer that represents the mutation of a pre-existing buffer
c                   > [         TU ]  S US9  UR                  5       n[        R                  R                  U5        U/U l        X0l        [        R                  R                  U 5      U l	        g r  )
r  rs  r  r`   r   r.  mutation_namesmutating_noder*  r   )r`  r4  mutated_noder
  mutated_node_namer  s        rv   rs  MutationOutput.__init__  s^    d62(113	##$5601(5GG++D1	rx   c                    U R                   $ rq   )r
  rg  s    rv   r  MutationOutput.get_defining_op  s    !!!rx   c                    U R                   $ rq   )r
  rg  s    rv   r4  !MutationOutput.get_mutation_names  r  rx   c                    gr  r   rg  s    rv   r  MutationOutput.should_allocate  r  rx   )r
  r
  r   )r
  rw  r   r   r  ro  rI  )r   r   r   r   r!  rs  r  r4  r  r   r  r  s   @rv   rK  rK    s!    2"# rx   rK  c                     ^  \ rS rSr% Sr0 rS\S'   \      SS j5       r\      SS j5       r	SU 4S jjr
SS jrSS	 jrS
rU =r$ )TMADescriptori  aL  
An IR node representing a generic host-side TMA descriptor in the Triton API
Mostly useful for user-defined Triton kernels relying on host-side TMA;
but can, in principle, be used for Inductor's Triton templates, too.

See TMADescriptorExperimental and TMADescriptorStable for the two implementations
(the old API and the new API)
zdict[Any, TMADescriptor]_CACHEc                    [        U5      S:X  d   eUS   S:X  a  [        U/US   Q76 $ US   S:X  d   e[        U/US   Q76 $ )Nr   r   experimentalr5   r  )r   TMADescriptorExperimentalTMADescriptorStable)r  r9  tma_metas      rv   _create_implTMADescriptor._create_impl  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<rx   c                    [        U5      U4nX0R                  ;  a  U R                  X5      U R                  U'   U R                  U   $ rq   )idr
  r
  )r  r9  r
  r  s       rv   r  TMADescriptor.create  sB     &z8$jj !..v@CJJsOzz#rx   c           
       > [         TU ]  S [        [        UUR	                  5       S95      U[        U5      S 5        Xl        [        R                  R                  U 5      U l
        [        R                  R                  U 5        g )Nr2  )r  rs  r  r8  r   r   r9  r`   r   r*  r   r+  )r`  r9  rM  r  r  s       rv   rs  TMADescriptor.__init__  ss     !,,. - 	
  GG++D1		""4(rx   c                &    UR                  U 5        g rq   )generate_tma_descriptorr)	  s     rv   r*	  TMADescriptor.codegen      ''-rx   c                    U R                   $ rq   )r9  rg  s    rv   
get_tensorTMADescriptor.get_tensor  r  rx   )r   r9  )r9  rn   r
  ztuple[str, tuple[Any, ...]]r   r
  )r9  rn   r>  r_  )r   r   r   r   r!  r
  r   r  r
  r  rs  r*	  r&
  r   r  r  s   @rv   r
  r
    s     (*F$)=='B=	= = 'B	 )*. rx   r
  c                  H   ^  \ rS rSrSr S         SU 4S jjjrSrU =r$ )r
  i  z
the new host-side TMA Descriptor API:
(the ones obtained via create_{1d,2d}_tma_descriptor calls).

See also TMADescriptorStable for the new API.
c                ^  > [        U5      S;   d   e[        U5      [        U5      :X  d   eUc  UR                  5       R                  nX l        X0l        X@l        [        U R                  5      U l        U/n/ U R                  QU R                  QU R
                  Pn[        TU ]!  UUUS9  g )N)r5   r   r9  rM  r  )	r   r   r|  r  
block_dimselement_sizer  r  rs  )r`  r9  r  r+
  r,
  rM  r  r  s          rv   rs  "TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
rx   )r+
  r  r,
  r  rq   )
r9  rn   r  list[Union[int, torch.SymInt]]r+
  r.
  r,
  r]  r   r   r   r   r   r   r!  rs  r   r  r  s   @rv   r
  r
    sG     '+

 -
 3	

 $
 

 
rx   r
  c                  0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )r
  i  z
the new host-side TMA descriptor API
(the ones obtained via TensorDescriptor.from_tensor).

See also TMADescriptorExperimental for the old API.
c                2   > X l         [        TU ]	  UU/US9  g )Nr*
  )block_shaper  rs  )r`  r9  r2
  r  s      rv   rs  TMADescriptorStable.__init__!  s&    &8% 	 	
rx   )r2
  )r9  rn   r2
  r.
  r/
  r  s   @rv   r
  r
    s    
 
rx   r
  c                  J   ^  \ rS rSr          SU 4S jjrSS jrSrU =r$ )SubgraphBufferi+  c                n  > [         T
U ]  S X5        X0l        X@l        [        R
                  R                  U 5      U l        [        R
                  R                  U 5        [        R
                  R                  U R                  XE5      U l
        [        U R                  5      nU HT  nXpR                  R                  UR                  '   U R                  R                  R                  UR                  5        MV     U Vs/ s H  oR                  PM     snU l        SS KJs  Jn	  [        R(                  " U R                  5         U	R+                  SSSS9   U R                  R,                  " U R                  6   S S S 5        S S S 5        g s  snf ! , (       d  f       N= f! , (       d  f       g = f)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r  rs  rH  example_inputsr`   r   r*  r   r+  make_subgraphsubgraphrP  rM  r  graph_input_namesr  
sym_inputstorch._inductor.configr  r6   set_graph_handlerr   run)r`  r4  r   rH  r;
  subgraph_namer?
  sym_inpsym_varinductor_configr  s             rv   rs  SubgraphBuffer.__init__,  s=    	v3,GG++D1		""4(--dgg~U(5
!G7>MM&&w||4MM++227<<@ " 8BBzG<<zB88  / &&""'+1 ' 
 !!4#6#67 0/	 C  0/s*   F	F&$F?F&
F#	F&&
F4c                     " S S5      nU R                    Vs/ s H  o3R                  5       PM     nnUR                  U" U R                  5      / U R                  QUQU R
                  /5        g s  snf )Nc                      \ rS rSrSS jrSrg),SubgraphBuffer.codegen.<locals>.CodegenGraphiP  c                2    Xl         UR                  U l        g rq   r   r   )r`  r   s     rv   rs  5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__Q  s    "
!JJ	rx   rL
  N)r   rd   )r   r   r   r   rs  r   r   rx   rv   CodegenGraphrJ
  P  s    'rx   rN
  )rM  r  'codegen_subgraph_with_flattened_outputsr=
  r?
  r   )r`  r#	  rN
  r   outer_inputss        rv   r*	  SubgraphBuffer.codegenO  sg    	' 	'
 8<{{C{!++-{C77'-doo--YYK	
 Ds   A1)r;
  rH  r   r=
  r?
  )
r4  r  r   r  rH  torch.fx.GraphModuler;
  	list[Any]rC
  r   r>  )r   r   r   r   rs  r*	  r   r  r  s   @rv   r5
  r5
  +  sC    !8!8 "!8 !	!8
 "!8 !8F
 
rx   r5
  c                  z   ^  \ rS rSrS rS
S jr S   SU 4S jjjrSS jr  S
U 4S jjrSS jr	SS jr
S	rU =r$ )UserDefinedTritonKerneli]  c                z  ^ SSK Jn  SSKJn  UR	                  U R
                  5      m/ n/ n/ n[        TU5      (       a  [        TS5      (       a&  UR                  U4S jTR                   5       5        O.[        TS5      (       d   eUR                  TR                  5        [        TS5      (       a<  TR                   H+  nUR                  TR                  R                  U   5        M-     O.[        TS5      (       d   eUR                  TR                  5        TR                   nTR                  mTX4U4$ )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  V   >#    U  H  nTR                   R                  U   v   M      g 7frq   )r   	arg_names)r  r   r  s     rv   r  BUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>k  s$      *4FqFII''*4Fs   &)restore_value	reset_idxreset_to_zero)triton.runtime.autotunerrW
  *torch._higher_order_ops.triton_kernel_wraprX
  
get_kernel
kernel_idxrr   r  r  rY
  r]
  r^
  r  r   r[
  r_
  configs)r`  rW
  rX
  rd
  restore_value_argsreset_to_zero_argsr   r  s          @rv   get_kernel_and_metadata/UserDefinedTritonKernel.get_kernel_and_metadata^  s   6P"--doo>(*(*fi(( v}--")) *4:4F4F*  v7777"))&*>*>?v{++))A&--fii.A.A!.DE * v7777"))&*>*>?nnGYYFw4FFFrx   c                   SSK Jn  U R                  5       u  nnnnUR                  UUU R                  UUU R
                  5      u  nnn	U R                   V
s0 s H  oU R                  U
5      _M     nn
[        UR                   Vs/ s H  oR                  U   PM     sn5      n/ n/ n/ n/ n[        R                  " UR                  5       [        [        R                  " S5      U	5      5       GH|  u  nnUR!                  U5        UR!                  U5        [#        U[$        5      (       a@  UR!                  UR'                  5       5        UR!                  UR)                  5       5        M~  [#        U[*        [,        [.        [0        R2                  45      (       a-  UR!                  U5        UR!                  [5        U5      5        M  UU;   a)  UR!                  S5        UR!                  [*        5        GM	  UcY   U" 5       (       a)  UR!                  S5        UR!                  [*        5        GMB  UR7                  5         UR7                  5         GMe  [9        S[5        U5       SU 35      e   U R;                  U5        UR=                  UUUUUUSU R?                  5       U R@                  RB                  S9	  g s  sn
f s  snf )	Nr   )triton_version_uses_attrs_dictr  r  zUnsupported arg type: r  T)	arg_typesraw_argsraw_keystriton_metar  r   original_fxnode_name)"torch._inductor.utilsrj
  rg
  !define_user_defined_triton_kernelr   gridr	  r	  r0   
constexprsr[
  r  r  r  r   repeatr  rr   rn   r  r   rs   r  r   r   r   r   r  r  r&	  generate_kernel_callr   r	  r   )r`  r#	  rj
  r  rd
  re
  rf
  new_namern
  extra_launch_argsr	  
named_argsr   constexpr_namesr   rk
  raw_keys_filteredraw_args_filteredr   r  s                       rv   r*	  UserDefinedTritonKernel.codegen}  sy   H ((*	
 55KKII
		
 261S1S
1SAt$$Q''1S 	 
 %6CTCT%UCTa&6&6q&9CT%UV!	')')"I$4$4R$8:K L
ID# $$T*$$S)#v&&C1134  1C#udEJJ!?@@C   c+( B  % 233KKO$$S)%))+%))+),B49+RPSu*UVVC
F 	W%$$&&#??$!%!2!2 	% 
	
[
 &Vs   J>Kc                P   > [         TU ]  U5      [        U R                  U5      -  $ rq   )r  r"  r   rr
  r}  s     rv   r"  ,UserDefinedTritonKernel.get_free_symbol_uses  s-    
 w+M:=MII}>
 
 	
rx   c                    [        5       $ rq   r/   rg  s    rv   r  0UserDefinedTritonKernel.get_unbacked_symbol_defs  r  rx   c          	       > / n0 n/ nUR                  5        H  u  p[        U	[        5      (       aX  [        R	                  U R                  U	5      5      n
X;   a  [        R                  XU   5      n
UR                  U
5        XU'   Mr  UR                  U	5        XU'   M     [        U5      S:w  d   eUS   R                  5       U l        [        TU ]5  S [        U R                  S9U[        U5      U5        Xl        X l        U R%                  5       u  p  nUR&                   Vs/ s H  oU;   d  M
  UPM     snU l        SSKJn  [        U5      S:  a  US   R.                  O0 nU" U0 UEUEU5       Vs/ s H  nUU   PM
     snU l        U R0                   Vs/ s H!  n[3        [        U R                  S9UU 5      PM#     snU l        [6        R8                  R;                  U 5        g s  snf s  snf s  snf )Nr   r  )identify_mutated_tensors)r  rr   rm   r'  r  r  r
  r  r  r   r   r   r  rs  r  r   rc
  rr
  rg
  r[
  r	  ra
  r
  r   mutable_argsrK  r	  r`   r   r+  )r`  rc
  rr
  tma_descriptor_metadatakernel_argsrM  r   r  r	  r  r   r  rd
  r   r  r
  autotuned_kwargsr  r  r  s                      rv   rs   UserDefinedTritonKernel.__init__  s    %%'DA!Y'' 99$:L:LQ:OP/%,,Q0JKAa q	$$Q'q	 ( 6{aQi**,dkk*- 	
 %	 $ < < >A "++.
+Ck/AC+.
* 	X03Gq0@71:,,b 0;;;*:;=T
 
 ((!
( :T[[93E(!
 	
""4(%.

!
s   '	G-4G-8G2(G7c                ,    [        U R                  5      $ rq   )r   r	  rg  s    rv   r  #UserDefinedTritonKernel.get_outputs  s    D))**rx   c                    U R                   $ rq   r  rg  s    rv   r   "UserDefinedTritonKernel.get_device  r  rx   )r   rr
  rc
  r
  r	  r	  r>  r`  rk  r  r  rQ  )r   r   r   r   rg
  r*	  r"  r  rs  r  r   r   r  r  s   @rv   rU
  rU
  ]  sQ    G>N
b %*
!
	!
 
3)	3)j+ rx   rU
  c                  X   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jrS	U 4S jjr	Sr
U =r$ )InplaceBernoulliFallbacki  =
This needs to be a custom class to handle mutation properly
c                   S U R                    5       u  n[        R                  R                  (       a\  UR	                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g UR	                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7frq   r  r  r   s     rv   r  3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     ;{!##%%{   r  r  z, NULL)r  )rM  r`   r   r/	  r  r>	  r  r  reprr  ending)r`  r#	  ru   s      rv   r*	   InplaceBernoulliFallback.codegen  s    ;t{{;77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klrx   c                    gr  r   rg  s    rv   r  (InplaceBernoulliFallback.should_allocate(  r  rx   c                >    U R                   S   R                  5       /$ r  rM  r  rg  s    rv   r4  +InplaceBernoulliFallback.get_mutation_names+      A'')**rx   c                    [        5       $ rq   r/   rg  s    rv   r  1InplaceBernoulliFallback.get_unbacked_symbol_defs.  r  rx   c                R  > [         TU ]  S [        UR                  5       S9U R	                  U/5      UUS9  [
        R                  R                  UR                  5       5        [
        R                  R                  U 5      U l
        [
        R                  R                  U 5        g )Nr  r	  )r  rs  r  r   r(  r`   r   r.  r  r*  r   r+  )r`  r	  ru   r  r  s       rv   rs  !InplaceBernoulliFallback.__init__1  s~    alln-$# 	 	
 	
##AJJL1GG++D1		""4(rx   rl  r>  rI  ro  r  r   r   r   r   r!  r*	  r  r4  r  rs  r   r  r  s   @rv   r
  r
    s&    +
) 
)rx   r
  c                  t   ^  \ rS rSrSrS
S jrSS jrSS jrSS jr  S
U 4S jjr	\
SSS jj5       rS	rU =r$ )InplaceCopyFallbacki?  r
  c                N    U R                  5       u  p#nUR                  X2U5        g rq   )r	  codegen_device_copy)r`  r#	  rA  r@  non_blockings        rv   r*	  InplaceCopyFallback.codegenD  s%    #'#4#4#6 <##Cl;rx   c                    gr  r   rg  s    rv   r  #InplaceCopyFallback.should_allocateH  r  rx   c                >    U R                   S   R                  5       /$ r  r
  rg  s    rv   r4  &InplaceCopyFallback.get_mutation_namesK  r
  rx   c                    [        5       $ rq   r/   rg  s    rv   r  ,InplaceCopyFallback.get_unbacked_symbol_defsN  r  rx   c           	       > [         TU ]  S UUUSSS9  [        R                  R	                  US   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nz
aten.copy_aoti_torch_copy_)r 	  r	  r   )	r  rs  r`   r   r.  r  r*  r   r+  )r`  r4  rM  r  r  s       rv   rs  InplaceCopyFallback.__init__Q  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(rx   c                    X4 Vs/ s H  o@R                  U5      PM     nnU4n[        [        UR                  5       S9UU5      nU$ s  snf r  )r  r
  r  r   )r  rA  r@  r
  r   rM  r  r  s           rv   r  InplaceCopyFallback.createc  sS    14
;
1##A&
;%$cnn./

  <s   A	rl  r>  rI  ro  r  r`  )r
  r   )r   r   r   r   r!  r*	  r  r4  r  rs  r  r  r   r  r  s   @rv   r
  r
  ?  s?    <+)
 
)$  rx   r
  c                  J    \ rS rSrSrS
S jrSS jrSS jrSS jrSS jr	Sr
g	)MutatingFirstArgExternKernelio  r
  c                    / S U R                    5       Q[        [        U R                  5      QnUR	                  U R                  5        SSR                  U5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7frq   r
  r
  s     rv   r  7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>v  s     9[!!##[r
  r  r  r  )rM  r  r
  r  r  r>	  r  r
  )r`  r#	  argrefss      rv   r*	  $MutatingFirstArgExternKernel.codegent  sl    
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
rx   c                    gr  r   rg  s    rv   r  ,MutatingFirstArgExternKernel.should_allocate}  r  rx   c                >    U R                   S   R                  5       /$ r  r
  rg  s    rv   r4  /MutatingFirstArgExternKernel.get_mutation_names  r
  rx   c                    [        5       $ rq   r/   rg  s    rv   r  5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  r  rx   c                    gr  r   rg  s    rv   has_side_effects-MutatingFirstArgExternKernel.has_side_effects  r  rx   r   Nr>  rI  ro  r  )r   r   r   r   r!  r*	  r  r4  r  r
  r   r   rx   rv   r
  r
  o  s     
+rx   r
  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )ResizeStorageBytesi  c                (  > [        U[        5      (       d   S5       e[        TU ]  S [	        UR                  5       S9U R                  U/5      U4S9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        SU l        SU l        [        R                  R                   R#                  UR$                  R                  5       5        g )NzTODO: dynamic shapesr  )r  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)rr   rs   r  rs  r  r   r(  r`   r   r.  r  r*  r   r+  r 	  r	  never_reuse_buffersr@  r3  )r`  variabler  r  s      rv   rs  ResizeStorageBytes.__init__  s    (C((@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG	##''(>(>(@Arx   )r	  r   r 	  r>  r	  r  s   @rv   r
  r
    s    B Brx   r
  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )SetSourceTensorKerneli  c                  > UR                  5         [        TU ]	  UR                  5       X/S[        R
                  R                  R                  R                  S9  [        R                  R                  R                  UR                  R                  5       5        [        R                  R                  R                  UR                  5       5        [        R                  R                  R                  U R                  5       5        UR                  5       n[!        [#        US9X5      [!        [#        US9X 5      /U l        g )Nz!torch.ops.aten.set_.source_Tensor)r 	  r	  r  )r  r  rs  r   r   r^   r-	  set_source_Tensorr`   r   r
  r@  r3  r  r   rK  r  r	  )r`  self_tensorstorage_tensorr   r  s       rv   rs  SetSourceTensorKernel.__init__  s    $$&%%')B		++99	 	 	
 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4kH:V4nK!
rx   c                v    U R                   S   R                  5       U R                   S   R                  5       /$ r  r
  rg  s    rv   r:  2SetSourceTensorKernel.get_inputs_that_alias_output  s/    A'')4;;q>+B+B+DEErx   r	  r>  ro  )r   r   r   r   rs  r:  r   r  r  s   @rv   r
  r
    s    
"F Frx   r
  c                  p   ^  \ rS rSrSrSS jrSS jrSS jrSS jrSSS	.       SU 4S
 jjjr	Sr
U =r$ )ScatterFallbacki  z
This needs to be a custom class to handle mutation properly.
This class handles both aten.scatter_ and aten.scatter_reduce_.
It also handle the case `src` being a scalar properly.
c           
        U R                   S   n[        R                  R                  (       a  SSS.nX#;   a  X2   nU R                  (       a  S U R
                   5       u  pEnO$S U R
                   5       u  pEU R                  S   nUR                  UX@R                  S   XV/U R                  U R                  U R                  UU R                  5       5        g )	Nr  rE  rD  )r@  multiplyc              3  @   #    U  H  oR                  5       v   M     g 7frq   r
  r
  s     rv   r  *ScatterFallback.codegen.<locals>.<genexpr>  s     Jk2244kr
  c              3  @   #    U  H  oR                  5       v   M     g 7frq   r
  r
  s     rv   r  r
    s     EA--//r
  r5   r   )r   r`   r   r/	  src_is_tensorrM  r  generate_scatter_fallbackr	  r 	  r	  )r`  r#	  r  get_operator_enumru   r   r@  s          rv   r*	  ScatterFallback.codegen  s    X&77(-6 B**2JdkkJOQsEEJQ$$Q'C))""1%u2  ##!	
rx   c                    gr  r   rg  s    rv   r  ScatterFallback.should_allocate  r  rx   c                >    U R                   S   R                  5       /$ r  r
  rg  s    rv   r4  "ScatterFallback.get_mutation_names  r
  rx   c                    [        5       $ rq   r/   rg  s    rv   r  (ScatterFallback.get_unbacked_symbol_defs  r  rx   NTr  include_selfc               d  > [        U[        5      U l        U R                  (       a&  X$U4 Vs/ s H  oR                  U5      PM     n	nU4n
O$X$4 Vs/ s H  oR                  U5      PM     n	nX54n
[        TU ]  S [        UR                  5       S9U R                  U	5      U
XgS.[        U5      SS/US9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R!                  U 5        g s  snf s  snf )Nr  r
  r  r
  )r 	  r	  r	  )rr   rm   r
  r  r  rs  r  r   r(  r   r`   r   r.  r  r*  r   r+  )r`  r	  ru   r(  r   r@  r  r
  r   tensorsr  r  s              rv   rs  ScatterFallback.__init__  s    (Y7 78oFo))!,oGF FM78jAj))!,jGA JMalln-(<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D(D-)r   r
  r>  rI  ro  r  )r(  rs   r  rO  r
  r   r   r   r
  r  s   @rv   r
  r
    sV    
.+ !%!!) 	!) !) !) 
!) !)rx   r
  c                  X   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jrS	U 4S jjr	Sr
U =r$ )IndexPutFallbacki  zI
This needs to be a custom class to handle mutation and indices properly
c                   S U R                    5       tp#n/ n[        U5      n[        U R                  5       Hd  u  pxU R                  U   b  UR	                  [        U5      5        M1  UR	                  [        R                  R                  R                  5        Mf     UR                  " U R                  5       X%U/U R                  5       Q76   g )Nc              3  @   #    U  H  oR                  5       v   M     g 7frq   r
  r
  s     rv   r  +IndexPutFallback.codegen.<locals>.<genexpr>  s     &Rk':':'<'<kr
  )rM  rE	  r   r  r  rF	  r`   r   r  r  generate_index_put_fallbackr>	  r	  )	r`  r#	  ru   r   valid_indicesr  iter_valid_indicesr   r   s	            rv   r*	  IndexPutFallback.codegen  s    &Rdkk&R#]!-0dll+DA||A*t$678qww33<<=	 , 	++  "A	
9=9P9P9R	
rx   c                    gr  r   rg  s    rv   r   IndexPutFallback.should_allocate  r  rx   c                >    U R                   S   R                  5       /$ r  r
  rg  s    rv   r4  #IndexPutFallback.get_mutation_names  r
  rx   c                    [        5       $ rq   r/   rg  s    rv   r  )IndexPutFallback.get_unbacked_symbol_defs  r  rx   c           
       > X0l         U Vs/ s H	  ofc  M  UPM     nnX$/UQ Vs/ s H  o R                  U5      PM     nnSn	[        T
U ]  S [	        WR                  5       S9U R                  U5      U4SU	US9  [        R                  R                  U R                  S   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g s  snf s  snf )Naoti_torch_index_put_outr  zaten.index_put_)r 	  r	  r	  r   )r  r  r  rs  r  r   r(  r`   r   r.  rM  r  r*  r   r+  )r`  r	  ru   r  r   
accumulater   r
  r
  r	  r  s             rv   rs  IndexPutFallback.__init__  s    $+=GqG=342M}2MN2MQ%%a(2MN4alln-(M0+# 	 	
 	
##DKKN$;$;$=>GG++D1		""4( >Ns   C9C9C>)r  r   r>  rI  ro  r  r
  r  s   @rv   r
  r
    s&    
+) )rx   r
  c                  .    \ rS rSr\S 5       rSS jrSrg)
DeviceCopyi+  c                   UR                  5       (       dU  [        S UR                  5        5       5      (       a0  [        R                  R
                  (       d  UR                  U5      $ [        R                  R                  U5        [        R                  R                  UR                  5       5        [        S5        U4n[        [        UUR                  5       UR                  5       S9U R!                  U5      /U5      $ )Nc              3  Z   #    U  H!  o[         R                  R                  ;   v   M#     g 7frq   )r`   r   rV	  r@  s     rv   r  $DeviceCopy.create.<locals>.<genexpr>0  s     G4Fq***4Fs   )+zDeviceCopy in input programr  )r+  r  rt  r6   aot_inductoruse_runtime_constant_foldingr1  r`   r   add_device_infor   rO   r
  r   r   r   r  )r  ru   r   r
  r  s        rv   r  DeviceCopy.create,  s     GA4D4D4FGGG''DD''//	'	/78%kkmZZ\
 q!"
 	
rx   c                   U R                  5       n[        U5      S:X  d   eU R                  (       a2  UR                  US   U R                  R	                  5       US   5        g UR                  US   U R	                  5       US   5        g )Nr   r   r5   )r	  r   r  r
  r  )r`  r#	  r   s      rv   r*	  DeviceCopy.codegenD  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Srx   r   Nr>  )r   r   r   r   r  r  r*	  r   r   rx   rv   r
  r
  +  s    
 
.Trx   r
  c                  X   ^  \ rS rSrSrS	S jrS
S jrSU 4S jjrSS jrSS jr	Sr
U =r$ )r   iO  z3
The result of a call to aten._local_scalar_dense.
c                    [        5       $ rq   r/   rg  s    rv   rs  DynamicScalar.get_readsT  r  rx   c                    gr  r   rg  s    rv   r  DynamicScalar.should_allocateW  r  rx   c                   > UR                  5         [        TU ]	  S [        [        R
                  " S5      S9U R                  U/5      5        Xl        X l        g Nr  r  )	r  r  rs  r  r   r   r(  symkeypath)r`  r  r  r3  r  s       rv   rs  DynamicScalar.__init__Z  sG    *ELL$78$:M:Mtf:U	
 rx   c                .    [        U R                  /5      $ rq   )r0   r  rg  s    rv   r  &DynamicScalar.get_unbacked_symbol_defsb  s    488*%%rx   c                &    UR                  U 5        g rq   )codegen_dynamic_scalarr)	  s     rv   r*	  DynamicScalar.codegene  s    &&t,rx   )r  r  rh  rI  r>  r  )r   r   r   r   r!  rs  r  rs  r  r*	  r   r  r  s   @rv   r   r   O  s&    &- -rx   r   c                  f   ^  \ rS rSrSrS
S jrSS jrSU 4S jjrSS jrSSS jjr	SS jr
S	rU =r$ )r   ii  z-
The result of a call to aten._assert_scalar
c                    [        5       $ rq   r/   rg  s    rv   rs  AssertScalar.get_readsn  r  rx   c                    gr  r   rg  s    rv   r  AssertScalar.should_allocateq  r  rx   c                v   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        g r  )r  rs  r  r   r   scalarrn	  )r`  r  rn	  r  s      rv   rs  AssertScalar.__init__t  s3    ell512	
 rx   c                    gr  r   rg  s    rv   r
  AssertScalar.has_side_effects  r  rx   c                .    [        U R                  U5      $ rq   )r   r  r!  s     rv   r"  !AssertScalar.get_free_symbol_uses  s    ];;rx   c           	        [         R                  (       d  g [        [        U R	                  SS95      5      n[
        R                  R                  (       a^  SU S3n[
        R                  R                  R                  U R                  SS9nUR                  SU SU R                   SU S	35        g [
        R                  R                  R                  U R                  SS9nUR                  S
U S35        UR                  S[        U R                  5       S35        UR                  U R                  5        S35        g )NFrL  zstd::to_string(r  )r  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)r6   scalar_assertsrF	  rE	  r"  r`   r   r/	  r  codegen_cpp_sizevarr  r  rn	  codegen_python_sizevarr
  r  )r`  r#	  symbol
symbol_strsizevars        rv   r*	  AssertScalar.codegen  s$   $$ d44454IJK77*6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:rx   )rn	  r  rh  rI  r>  r`  )r   r   )r   r   r   r   r!  rs  r  rs  r
  r"  r*	  r   r  r  s   @rv   r   r   i  s+    	<; ;rx   r   c                  *    \ rS rSr% S\S'   S\S'   Srg)ExternKernelNodei  r   r   zexport_schema.Noder   r   Nr   r   rx   rv   r-  r-    s    
I
rx   r-  c                     ^  \ rS rSrSr SSS. SU 4S jjjjrSU 4S jjrSS jrSS jrS	 r	\
S
 5       rS rS rSS jrS rSS jr\
SS j5       r\S 5       rU 4S jrSrU =r$ )FallbackKerneli  z
A class that represents a fallback kernel for handling operators that are not
directly support by inductor. It currently supports functional ops, view ops,
inplace aten ops, and mutating ops that are auto-functionalizable.
Nr	  c                 >^  [         TT ]  U[        U5      [        U5      US9  ST l        UT l        [        U[        R                  R                  [        R                  R                  45      (       d   SU S[        U5       S35       eUT l        UT l        Uc  0 OUT l        [        R                  R!                  T R"                  5        / T l        / T l        [        T R                  [        R                  R                  5      (       a  g ST R                  R)                  5       ;   a  g T R                  R*                  n[        R,                  R.                  R1                  T R                  5      (       a-  T R&                  R3                  US   R5                  5       5        g UR6                  (       a  [9        U5      (       d  [;        SU 35      eT R                  T R<                  T R>                  5      u  pS
U 4S	 jjn
[        R,                  R.                  RA                  XU5       H  u  pU
" X5        M     g )Nr
  Fz#Fails to create FallbackKernel for r  z not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                "  >^  [        T R                  [        R                  5      (       a  [        U[        [
        45      (       d   e[        R                  " T R                  5      (       a  [        U[
        [        45      (       a   eUc  g T R                  c  g SU U4S jjn[        R                  " T R                  5      (       a  Ub  U H  nU" U5        M     g g [        R                  " T R                  5      (       d   eU" U5        g )Nc                  > TR                   R                  U R                  5       5        TR                  R                  (       a<  TR
                  R                  [        [        U R                  5       S9U T5      5        g g r  )	alias_namesr  r  
alias_infois_writer	  rK  r  r   )r   infor`  s    rv   	add_aliasPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias	  sZ      ''

5??++))00&z'H!TR ,rx   r>  )
rr   r   r   ListTyper   r   library_utilsis_tensor_like_typer6  is_tensorlist_like_type)r8  r  r9  optional_tensor_argr`  s   `   rv   handle_aliasing_and_mutation=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s    $))U^^44!#e}555500;; &cE4=9999{&  44TYY???/2+!"56 03 # %88CCCC#rx   r>  )!r  rs  r   use_runtime_dispatchr	  rr   r   r	  r	  r8	  r   r	  rP	  r   r`   r   warn_fallbackr 	  r5  r
  r   r	  _libraryr  mutates_and_returns_first_argr  r  
is_mutabler   r  rM  r  
zip_schema)r`  r4  r  rd	  nontensor_argsrP	  r   r	  schemar   r@  r8  r  r  s   `            rv   rs  FallbackKernel.__init__  s    	+.!	 	 	
 %*!!2

%%

..
 
 	X 14<.W	X 
 ","Nb	d556 '))+d&&

(F(FGG !1!1!6!6!88
 !!)) >>==d>N>NOO&&{1~'>'>'@A%;F%C%C%9&B  **4;;8J8JK	: --88vNID(3 Orx   c                @  > [         TU ]  5       nU R                  [        R                  R
                  R                  L a]  U R                   HM  n[        U[        5      (       d  M  UR                  [        R                  " UR                  5       5      5      nMO     U$ rq   )r  r  r	  r   _prims	rng_primsgraphsafe_run_with_rng_stater  rr   rT	  	with_readr7   r  r  )r`  r  r  r  s      rv   r  FallbackKernel.get_read_writes  sw    g-/u||55RRR))c>22"-"7"7$,,S\\^<#K * rx   c           	     n    UR                  U R                  5       U R                  [        U SS 5      5      $ Nr	  )(codegen_unbacked_symbol_defs_for_outputsr  rF  r   r)	  s     rv   codegen_unbacked_symbol_defs+FallbackKernel.codegen_unbacked_symbol_defs'  s0    ??MMOT\\749Ld+S
 	
rx   c                    [        U SS 5      =n(       aC  [        [        R                  R                  R
                  U5      nUc   eUR                  5       $ [        5       $ rR  r   r+   r`   r   r   r   r  r0   r`  r	  resolveds      rv   r  'FallbackKernel.get_unbacked_symbol_defs,  Z     '.A4 HHH0  **,=H '''==?"<rx   c                F   [         R                   " S S5      5       nU R                   Vs/ s H  o!" UR                  5       5      PM     nnU R	                  X0R
                  5      u  pE[        R                  R                  (       a  [        U R                  [        R                  R                  5      (       a  U R                  XE5      n[        U R                  R                   R"                  U5       VVs/ s H8  u  pb[        R                  R$                  R'                  X&R(                  5      PM:     nnnO9U Vs/ s H,  n[        R                  R$                  R'                  U5      PM.     nnU R*                  R-                  U5        U$ s  snf s  snnf s  snf )Nc                  *    \ rS rSr% S\S'   SS jrSrg))FallbackKernel.codegen_args.<locals>.Shimi7  r   refc                    U R                   $ rq   )r_  rg  s    rv   r
  2FallbackKernel.codegen_args.<locals>.Shim.__repr__;  s    xxrx   r   Nr[  )r   r   r   r   r   r
  r   r   rx   rv   Shimr^  7  s    H rx   rb  )rp  	dataclassrM  r  rP	  r  r`   r   r/	  rr   r	  r   r	  r	  r	  r   r	  r	  r  r	  r	  r   rR  )r`  rb  ru   rd	  r   r   params          rv   r	  FallbackKernel.codegen_args6  sH   				  	  
	  =AKKHKqtA//12KH**;8J8JK77:d.>.>

@U@U#V#V..t<D !$D$4$4$<$<$F$F M MHE $$33AG M  D
 EIIDqAGG((77:DDI 	6" I
 Js   F:?F3Fc                p   U (       a*  U  Vs/ s H  n[        U[        5      (       a  M  UPM     snOS nU(       a;  U  Vs/ s H)  oDR                  5       (       d  M  UR                  5       PM+     nnUS   $ [        U[        R                  5      (       a  UR
                  $ [        U[        [        45      (       al  [        S U 5       5      nU Vs/ s H  ow(       d  M  UPM     nn[        U5      S:X  a  US   $ U H!  n[        UR                  5      (       d  M  Us  $    US   $ g s  snf s  snf s  snf )Nr   c              3  N   #    U  H  n[         R                  S U5      v   M     g 7frq   )r/  find_devicer8  s     rv   r  -FallbackKernel.find_device.<locals>.<genexpr>Z  s#      $=K**433^r  r5   )rr   r  r   r   r_	  r   r   r   r0   r   rW   r   )rd	  rj	  r   non_torch_bind_tensor_argsr  devices
device_setr   s           rv   rh  FallbackKernel.find_deviceM  s     $J1:a+IQJ 	#
 &3>S;C..BR's~~';GS1:nell33!(((ntUm44# $=K$ J -7AJ&&vJGA7|q qz!!&++&&!M " 1:+ K
 T Bs!   D)D)D.D.
D3 D3c                    [        U R                  [        R                  R                  5      (       a  g[        U R                  5      R                  5       $ r  )rr   r	  r   r	  r8	  r$   rF  rg  s    rv   r
  FallbackKernel.has_side_effectsg  s<    d&&

(F(FGGt//0;;==rx   c                    U R                   $ rq   )r5  rg  s    rv   r:  +FallbackKernel.get_inputs_that_alias_outputl  r~  rx   c                P    [        U R                  5      S::  d   eU R                  $ r  )r   r
  rg  s    rv   r4  !FallbackKernel.get_mutation_nameso  s'    4&&'1,,,"""rx   c           
        [         R                  SU R                  5       U R                  5        [	        U [
        5      (       d   eU R                  U R                  U R                  5      u  pU R                  X5      nU R                   Vs/ s H  nU R                  " U40 UD6PM     nnU R                  n[        R                  R                  (       d  / UQUQ$ [        SS5      nUR!                  XQU5      nS n[	        U["        R$                  R&                  R(                  5      (       a#  UR+                  US   US   5      R,                  n	OUR.                  R,                  n	[1        U	5      S:X  aB  U R2                  (       a  U R2                  OU R4                  n
U	S   R6                  nU" X5      /nO:[9        XR2                  5       VVs/ s H  u  pU" UR6                  U5      PM     nnn[;        U R                  5       [<        R>                  " U R                  RA                  5       UU0 S9S9n[        R                  RB                  RE                  U5        / UQUQ$ s  snf s  snnf )a  
ProxyExecutor Design Note
We export the ExternFallbackNodes (for custom ops) into a serialized file
and run it with a host side proxy executor to address the ABI problem
This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
Detailed design doc can be found at
https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
z4Extern kernel node added for node %s with target %s.Nc           	     d   [        U [        R                  [        R                  45      (       a  Un[        U[        [
        45      (       a  [        U5      S:X  d   eUS   n[        U [        R                  5      (       a=  [        R                  R                  [        R                  " UR                  5       S9S9$ Ub   e[        R                  R                  SS9$ [        U [        R                  5      (       a{  [        U R                  5       [        R                  5      (       aN  [        R                  R                  U Vs/ s H%  n[        R                  " UR                  5       S9PM'     snS9$ [        U [        R                  5      (       a  [        U R                  5       [        R                  5      (       a  Uc8  [        R                  R                  [        R                  R                  SS9S9$ [        R                  R                  [        R                  R                  [        R                  " UR                  5       S9S9S9$ [        U [        R                   5      (       a  [        R                  R                  US	9$ [#        S
[%        U 5       35      es  snf )Nr5   r   rl  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )rr   r   
TensorTypeNoneTyper   r   r   export_schemaArgumentr  TensorArgumentr  r;  getElementTypeOptionalTypeOptionalTensorArgumentIntTypeRuntimeErrorr   )return_typerA  r   s      rv   handle_single_outputFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output  sO   +(8(8%..'IJJftUm44v;!+++ )Ck5+;+;<<(1188"/">">CLLN"S 9   ;&;(11888FFK88Z**,e.>.>> > %--44 $* #)C &44#,,.I#)  5   K););<<**,e.>.>B B >(1188+8+O+O+V+V$( ,W , 9   )1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K77$--44F4CC"%=d;>O=P#QRR5 s   
,J-r   r5   )r  rM  rF  metadata)r   r   )#r  r  r  r	  rr   r/  rP	  rM  r  r	  r	  r	  r`   r   aot_moder   serialize_inputsr   r\	  	torchbindCallTorchBindrI  returnsr	  r   rF  r	  r	  r   r-  r}  rb   r   extern_kernel_nodesr  )r`  r   r   r  ordered_kwargsr  
serializernamed_argumentsr  r  rF  r  output_argumentsreturn_schemarA  r   s                   rv   export_extern_kernel_node(FallbackKernel.export_extern_kernel_nodes  s.    			BMMO	
 $////**4;;8J8JK**48 99
9 !!#009 	 
 !!ww+T+N++*46
$55fFK-	S^ fe55??MMNNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[ JK .1,,-G -G)M %]%<%<fE-G   
  ##'',,.&(	
 	
##**40''''w
T s   I*I/c                  ^ ^^ T R                   nUR                  S:X  a}  [        U[        R                  R
                  5      (       d   e[        R                  R                  (       a2  SSK	J
n  [        U5      U;  a  [        R                  SU5        ST l        O}UR                  S:X  a,  [        U[        R                  R
                  5      (       d   eOA[        R                  R                  (       a"  U[        R                   R"                  ;  T l        [        R                  R                  (       a  [        U[        R                  R
                  5      (       a  T R                  (       d  SU4S jjmT R%                  T R&                  T R(                  5      u  nm[*        R,                  " UUU 4S jT R.                   5       5      n[1        U4S	 j[3        XRR4                  R6                  5       5       5      T l        T R9                  U5        T R                  (       at  T R;                  5       nUR=                  T R?                  5       T R@                  U 4S
 jT R                   UT RB                  (       a  T RB                  OT RD                  5        ORURG                  T 5        [        T RH                  [J        5      (       a"  T RM                  U5        T RO                  U5        T RQ                  U5        g )Nr-	  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                   > [        U [        R                  5      (       a  T" U R                  5       5      $ [        U [        R                  5      $ rq   )rr   r   r  r  
NumberType)r   	is_numbers    rv   r  )FallbackKernel.codegen.<locals>.is_number  s=    a!3!344$Q%5%5%788!!U%5%566rx   c              3  J   >#    U  H  nTR                   " U40 TD6v   M     g 7frq   )r	  )r  r	  r   r`  s     rv   r  )FallbackKernel.codegen.<locals>.<genexpr>  s(      ? ))!6v6?s    #c              3  z   >#    U  H0  u  p[        U[        5      =(       a    T" UR                  5      v   M2     g 7frq   )rr   complexr	  )r  r  rS  r  s      rv   r  r    s2      ,DDA 1g&A9Q[[+AADs   8;c                 H   > / T R                  5       QT R                  5       Q$ rq   )r	  r	  rg  s   rv   r  (FallbackKernel.codegen.<locals>.<lambda>"  s"    F$++-F0C0C0EFrx   )r   ztorch.JitTyper   r   ))r	  r0	  rr   r   r	  r	  r`   r   r/	  torchgen.aoti.fallback_opsr  r   r  r  rB  r6   r  custom_ops_to_c_shimsrP	  rM  r  r  r  r	  rB  r   r	  r	  r&	  r  ,generate_fallback_kernel_with_runtime_lookupr  r 	  rF  r	  generate_fallback_kernelr4  r  r	  r	  rT  )	r`  r#	  r  r  r   	args_iterexported_argsr  r   s	   `      @@rv   r*	  FallbackKernel.codegen  s(   !!v%fejj&;&;<<<<ww""Lv;&;; KKa 15D--fejj&;&;<<<<WW   f11GGG % GG65::#8#899--7  ..t{{D<N<NOLD& "!??I ), ,	>>+C+CD, )D%
 	W%$$ ::<M@@''F   $$2G2G ,,T2$++v..))'2..w7))'2rx   c           	         [        U R                  U R                  [        U R	                  5       5      [        U R                  5       5      5      $ rq   )r6  r   r   rM   r   r   )rA  s    rv   tensor_to_layoutFallbackKernel.tensor_to_layout0  s9    MMLL%fkkm4%fmmo6	
 	
rx   c           	       ^ ^^^ [         R                  4nX;  a  [        R                  R                  O	[        5       nU   T R                  " U/UQ70 UD6u  nnnn	n
S S S 5        [        S W 5       5      mT R                  UW5      nU(       dI  [        U[        R                  R                  R                  5      (       a  [        R                  " S5      nUc  T " [        US9UUWW	W
S9mO U(       d   S5       eT " [!        US9UUWW	W
S9mU UUU4S jmT" U/ 5      n[        U["        [$        [&        45      (       a	  UTl        U$ U/Tl        U$ ! , (       d  f       N= f)Nc              3  8   #    U  H  n[        U5      v   M     g 7frq   )r/  r  s     rv   r  (FallbackKernel.create.<locals>.<genexpr>J  s     !K{,s"3"3{rq  r  r  r0  z"Not sure where to find device infoc                J  >^ ^ [        T [        [        45      (       a/  [        T 5      " UUU 4S j[	        [        T 5      5       5       5      $ [        T [        5      (       a<  T R                  5        VVs0 s H  u  p#UT" UT[        T 5      U4/-   5      _M      snn$ [        T [        R                  5      (       a}  [        TR                  T 5      TT5      n[        R                  (       d  T(       d  [        T 5      (       d3  [        R                   R"                  R%                  UR&                  5        U$ [        T [(        5      (       a  T $ [        T [        R*                  5      (       a  T R,                  R.                  $ T b   S[        T 5       S35       eg s  snnf )Nc              3  Z   >#    U  H   nT" TU   T[        T5      U4/-   5      v   M"     g 7frq   )r   )r  r   generate_outputr  rA  s     rv   r  AFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>k  s7      $/ $F1Iw4<:K9L/LMM/s   (+zFallbackKernel output type z is not supported)rr   r   r   r   r   r   r   r  r   r_	  MultiOutputr  r6    assume_unaligned_fallback_outputr]   r`   r   r0  r@  r   rs   SymIntr   r  )	rA  r  r  r  r  r  r  has_unaligned_inputpackeds	   ``   rv   r  .FallbackKernel.create.<locals>.generate_outputi  s[   &4-00F| $"3v;/$   FD)) %+LLN$2 g$v,9L8M.MNN$2  FELL11!((0 ;;*,V44GG--11#((;
FC((FELL11{{'''~ 1$v,?PQ~ 3s   6%F)r-	  *_fused_moving_avg_obs_fq_helper_functionalr`   r   r  r   ro	  rB  rh  rr   r   r\	  r  r  r   r  r  r   r   r   rF  )r  r  r   r   fake_incorrect_kernelscontextrj	  rd	  re	  rP	  r	  r   rF  r  r  r  s   `            @@@rv   r  FallbackKernel.create9  sj   "&"Q"Q!S!'!EAGG;= 	  ""6;D;F;!  "!K{!KKn=*E++55CC
 
 \\%(F!&)"3F ???6!0"3F 	  	D "."5geT233$FN  &YFNc Ws   E
E#c                    > [         TU ]  5       $ rq   )r  r	  r  s    rv   r	  FallbackKernel.apply_constraint  s    w'))rx   )r5  r   r
  r	  r	  rP	  rB  rq   r>  rg  r  ro  )rA  r  )r   r   r   r   r!  rs  r  rT  r  r	  rr  rh  r
  r:  r4  r  r*	  r  r  r  r	  r   r  r  s   @rv   r/  r/    s     j4 j4 
j4 j4X


 .  2>
 #m(^L3\ 
 
 V Vp* *rx   r/  c                  N   ^  \ rS rSrSrS	S jrS
S jrSS. SU 4S jjjrSrU =r	$ )ComplexViewi  z9View a complex number as two dtyped numbers or vice versac                    gr  r   rg  s    rv   r  ComplexView.should_allocate  r  rx   c                >    U R                   S   R                  5       /$ r  r
  rg  s    rv   r:  (ComplexView.get_inputs_that_alias_output  s    A'')**rx   Nr0  c          	     *   > [         TU ]  UUUUUUS9  g )Nr0  )r  rs  )r`  r4  r  rd	  rH  rP	  r	  r  s          rv   rs  ComplexView.__init__  s)     	/ 	 	
rx   r   rI  ro  r>  )
r   r   r   r   r!  r  r:  rs  r   r  r  s   @rv   r  r    s)    C+ 
 

 
rx   r  c                  *    \ rS rSr% S\S'   SS jrSrg)r  i  rT  r   c                    U R                   $ rq   r  rg  s    rv   r   MultiOutputLayout.get_device  r  rx   r   NrQ  )r   r   r   r   r   r   r   r   rx   rv   r  r    s    rx   r  c                  p   ^  \ rS rSrSS jr S	     S
U 4S jjjr S	   SS jjrSS jrSS jrSr	U =r
$ )r  i  c                    UR                  U 5        U R                  (       d#  U R                  U5        U R                  U5        g g rq   )codegen_multi_output!skip_size_stride_alignment_checksr	  r	  r)	  s     rv   r*	  MultiOutput.codegen  s:    $$T*55%%g.**73 6rx   c                   > [         TU ]  S X/S5        [        R                  R	                  U 5      U l        [        R                  R                  U 5        X0l        X@l        g rY  )	r  rs  r`   r   r*  r   r+  r  r  )r`  r4  r  r  r  r  s        rv   rs  MultiOutput.__init__  sK     	vw3GG++D1		""4(1R.rx   c                >    U R                   S   R                  U5      $ r  )rM  r"  r!  s     rv   r"   MultiOutput.get_free_symbol_uses  s     {{1~22=AArx   c                |    [        U R                  5      S:X  a#  [        U R                  S   [        5      (       a  gg)Nr5   r   TF)r   rM  rr   r  rg  s    rv   r  MultiOutput.should_allocate  s/    t{{q t{{1~'899rx   c                    U R                    Vs/ s HI  n[        U[        5      (       d  M  [        UR	                  5       5      S:  d  M9  UR                  5       PMK     sn$ s  snf r  )rM  rr   r/  r   r:  r  )r`  rO  s     rv   r:  (MultiOutput.get_inputs_that_alias_output  s\     {{
"#~.  C4467!; CLLN"
 	
 
s   A"A"A")r  r   r  r>  r`  )r4  r  r  zlist[tuple[Any, ...]]r   r   rk  rI  ro  )r   r   r   r   r*	  rs  r"  r  r:  r   r  r  s   @rv   r  r    sd    4 +0SS '	S 
S S %*B!B	!B

 
rx   r  c                     \ rS rSr% SrS\S'   S.S jrS/S jrS0S jrS1S jr	S2S	 jr
S3S
 jrS4S5S jjrS6S jrS7S jrS8S jrS.S jrS7S jr S9     S:S jjrS;S jrS<S jr S9     S=S jjrS>S jrS?S jrS@S jrSAS jrSBS jrSCS jrS.S jrS.S jrSDS jrSES jrS3S  jr SES! jr!SBS" jr" S9   SFS# jjr#SGS$ jr$SHS% jr%S4SIS& jjr&\'SJS' j5       r(SKS( jr)SJS) jr*SLS* jr+\'S+ 5       r,S3S, jr-\-r.S-r/g)Mr8  i  z;
TensorBox / StorageBox allow in-place mutation of Tensors
rn   r3  c                6    U R                   R                  5       $ rq   r\  rg  s    rv   r  !MutableBox.has_exceeded_max_reads  r^  rx   c                6    U R                   R                  5       $ rq   rL  rg  s    rv   r   MutableBox.get_device  rJ  rx   c                6    U R                   R                  5       $ rq   r2  rg  s    rv   r  MutableBox.make_loader      yy$$&&rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.make_indexer  rf  rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.get_stride  rJ  rx   c                6    U R                   R                  5       $ rq   rQ  rg  s    rv   r  MutableBox.get_name  rS  rx   Nc                8    U R                   R                  U5      $ rq   )r3  r  r  s     rv   r  MutableBox.has_large_inner_fn  s    yy++I66rx   c                8    U R                   R                  U5      $ rq   rX  r  s     rv   r  MutableBox.mark_reuse  rZ  rx   c                6    U R                   R                  5       $ rq   rd  rg  s    rv   r  MutableBox.realize_hint  rf  rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.unwrap_view  r  rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.is_input_buffer      yy((**rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.freeze_layout  s    yy&&((rx   c                8    U R                   R                  X5      $ rq   )r3  r  r   s      rv   r  *MutableBox.freeze_layout_with_stride_order  s     yy88NNrx   c                8    U R                   R                  U5      $ rq   )r3  r  r  s     rv   r  (MutableBox.freeze_layout_with_fill_order  s    yy66u==rx   c                8    U R                   R                  U5      $ rq   )r3  r  r
  s     rv   r  (MutableBox.freeze_layout_with_same_order  s    yy66v>>rx   c                8    U R                   R                  X5      $ rq   )r3  r  r  s      rv   r  +MutableBox.freeze_layout_with_exact_strides  s     yy99-WWrx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.get_read_writes$  r  rx   c                6    U R                   R                  5       $ rq   r  rg  s    rv   rs  MutableBox.get_reads'  rn  rx   c                6    U R                   R                  5       $ rq   r  rg  s    rv   r  MutableBox.num_reads*  rn  rx   c                6    U R                   R                  5       $ rq   rh  rg  s    rv   r  MutableBox.get_storage_numel-  rj  rx   c                6    U R                   R                  5       $ rq   r  rg  s    rv   r%  MutableBox.get_reduction_type0  r  rx   c                6    U R                   R                  5       $ rq   r  rg  s    rv   r(  MutableBox.get_reduction_size3  r  rx   c                6    U R                   R                  5       $ rq   rl  rg  s    rv   r+  MutableBox.is_extern6  rn  rx   c                6    U R                   R                  5       $ rq   )r3  r.  rg  s    rv   r.  MutableBox.is_no_op9  rS  rx   c                8    U R                   R                  U5      $ rq   r  r  s     rv   r1  MutableBox.constant_to_device<  s    yy++F33rx   c                6    U R                   R                  5       $ rq   )r3  r4  rg  s    rv   r4  MutableBox.get_mutation_names?  r  rx   c                6    U R                   R                  5       $ rq   )r3  r7  rg  s    rv   r7  MutableBox.get_operation_nameB  r  rx   c                6    U R                   R                  5       $ rq   )r3  r:  rg  s    rv   r:  'MutableBox.get_inputs_that_alias_outputE  s    yy5577rx   c                6    U R                   R                  5       $ rq   r`  rg  s    rv   r  MutableBox.realizeH  rb  rx   c                8    U R                   R                  U5      $ rq   r5  r!  s     rv   r"  MutableBox.get_free_symbol_usesK  s     yy--m<<rx   c                6    U R                   R                  5       $ rq   rt  rg  s    rv   rt  MutableBox.get_read_namesP  rv  rx   c                6    U R                   R                  5       $ rq   )r3  r  rg  s    rv   r  MutableBox.get_defining_opS  r  rx   c                8    U R                   R                  U5      $ rq   )r3  r  r  s     rv   r  MutableBox.codegen_referenceV  s    yy**622rx   c                6    U R                   R                  5       $ rq   r3  r  rg  s    rv   r4  MutableBox.layoutY  s     yy((**rx   c                6    U R                   R                  5       $ rq   rH  rg  s    rv   r   MutableBox.get_layout^  rJ  rx   c                6    U R                   R                  5       $ rq   r  rg  s    rv   r  MutableBox.get_output_speca  r  rx   c                6    U R                   R                  5       $ rq   r-  rg  s    rv   r   MutableBox.get_sized  rS  rx   c                .    U R                   R                  $ rq   )r3  r   rg  s    rv   r   MutableBox.dtypeg  s    yyrx   c                ~   [        U R                  [        5      (       aQ  [        U 5      R                   S[        U R                  5      R                   S3nSnU R                  R                  nO&[        U 5      R                   S3nU R                  nSnU[        [        U5      5      U/nSR                  U5      $ )Nr  z))r  
)rr   r3  r8  r   r   r  r   r  )r`  line0endlr=  r  s        rv   r  MutableBox.__str__k  s    dii,,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyrx   r   rI  rQ  rU  rW  rY  r[  rq   r\  r^  r>  r_  r`  ra  rc  rd  rf  rg  rh  ri  rj  rN  rl  rn  ro  rk  r?  rB  rP  rH  rG  rJ  )0r   r   r   r   r!  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rs  r  r  r%  r(  r+  r.  r1  r4  r7  r:  r  r"  rt  r  r  ru  r4  r   r  r   r   r  r
  r   r   rx   rv   r8  r8    sO    L2&'(&$7+('+) 7<OO/3O	O
>? DIX+X<@X	X
+%%-..%$4..8# %*=!=	!=
*+3 + +&+$   " Hrx   r8  c                  $    \ rS rSr\S 5       rSrg)rm   i  c                X    [        U [        5      (       a  U $ [        [        U 5      5      $ rq   )rr   r   rm   r(  r3  s    rv   r  TensorBox.create  s%    d122KD)**rx   r   N)r   r   r   r   rr  r  r   r   rx   rv   rm   rm     s    + +rx   c                  X    \ rS rSrSS jrS rSS jrSS jrSS jrS r	SS jr
S	 rS
rg)r(  i  c                    [        U R                  [        [        45      (       a5  U R                  R	                  5       [
        R                  R                  ;   $ gr  )rr   r3  r  r8  r  r`   r   r  rg  s    rv   r  StorageBox.is_input_buffer  s=    dii+!?@@99%%'177+?+???rx   c                    [        U R                  [        5      =(       a5    U R                  R                  5       [        R
                  R                  ;   $ rq   )rr   r3  r.  r  r`   r   rV	  rg  s    rv   rp  StorageBox.is_module_buffer  s9    tyy>3 :		""$(9(99	
rx   c           	        [        U R                  [        [        [        [
        [        45      (       a  U R                  R                  5       $ [        U R                  [        [        [        [        45      (       d   [        U R                  5      5       eU R                  R                  5       nU R                  R                  5       n[        S [        U R                  R!                  5       U R                  R#                  5       U R                  R%                  5       S9U R                  S9U l        [&        R(                  R+                  U R                  5      U R                  l        [&        R(                  R/                  U R                  5        U R0                  U R                  l        XR                  l        X R                  l        U R                  R,                  $ )Nr  r  )rr   r3  r  r'  r  r8  r%  r  r  rt  r  r  r   r|  rx  r   r   r   r   r`   r   r*  r   r+  rU  rY  rW  )r`  rY  rW  s      rv   r  StorageBox.realize  sS   II	
 	
 99%%''$))iD$%GHH 	
$IIK
 	
H ii//1II++-	"!yy++-ii))+YY'')
 
	 00;			""499- LL		 +		'		yy~~rx   c                    [        U R                  [        [        45      (       a:  U R                  R	                  5       R
                  S:  a  U R                  5         ggg)z<
Called on buffers we expect to be forced to realize later.
r5   N)rr   r3  r  rt  r  nontrivial_read_countr  rg  s    rv   r  StorageBox.realize_hint  sI    
 tyy9i"899		**,BBQFLLN G :rx   c                    [        U R                  [        5      =(       a8    U R                  5       [        R
                  :  =(       d    U R                  5       $ rq   )rr   r3  r  r  r6   realize_acc_reads_thresholdr  rg  s    rv   r  !StorageBox.has_exceeded_max_reads  s@    $))Y/ 
NNvAAA )&&(	
rx   c                r  ^ US:  a  [        U R                  [        [        45      (       a  [	        U R                  5      (       a9  U R                  R                  5       mSS/n[        U4S jU 5       5      (       a  gU R                  5       [        R                  :  =(       d    U R                  5       $ g)zR
A heuristic to decide if we should realize a tensor
that is used multiple times.
r5   expsigmoidc              3  @   >#    U  H  oTR                   ;   v   M     g 7frq   )used_ops)r  ru   opcounts     rv   r  5StorageBox.should_realize_on_reuse.<locals>.<genexpr>  s     @iG,,,is   TF)rr   r3  r  rt  r  r  rB  r  r6   realize_reads_thresholdr  )r`  r  	heavy_opsrC  s      @rv   should_realize_on_reuse"StorageBox.should_realize_on_reuse  s    
 19DII	9/EFFdii  ))446"I.	@i@@@ 6#A#AA -**, rx   c                R    U R                  U5      (       a  U R                  5         g g rq   )rG  r  r  s     rv   r  StorageBox.mark_reuse  s!    ''..LLN /rx   c                6    U R                   R                  5       $ rq   r  rg  s    rv   r  StorageBox.num_reads  rn  rx   r/  NrI  rN  r>  r^  )r   r   r   r   r  rp  r  r  r  rG  r  r  r   r   rx   rv   r(  r(    s+    

B
$%rx   r(  c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
Subgraphi  r   r   rR
  graph_moduleNzOptional[GraphLowering]r   r   )r   r   r   r   r   r   r   r   rx   rv   rN  rN    s    
I&&%)E")rx   rN  c                    U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     n n[        [	        S U  5       5      5      [        U 5      :  $ s  snf )Nc              3  8   #    U  H  n[        U5      v   M     g 7frq   )r
  )r  r*  s     rv   r  '_has_aliased_buffers.<locals>.<genexpr>  s     ;7"V**7rq  )rr   r8  r  r   r0   )buffersr*  s     rv   _has_aliased_buffersrT    sd     F !+6? C CO  
 z;7;;<s7|KKs   1Ac                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'           SU 4S
 jjr	\
SS j5       rSS jrSrU =r$ )InvokeSubgraphi  z&
Ir node for the invoke_subgraph HOP.
NOptional[Subgraph]r=
  zOptional[list[TensorBox]]operandsOptional[list[MultiOutput]]rF  c                   > [         TU ]  S UUS9  Xl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r		  )r  rs  r=
  r`   r   r*  r   r+  )r`  r=
  rX  r4  r  s       rv   rs  InvokeSubgraph.__init__  sO     	 	 	

 !GG++D1		""4(rx   c                j  ^ SSK Jn  [        R                  R                  nS nUR
                  R                  S5      =n(       a	  US   SS  nO-UR                  SS  nU Vs/ s H  oR
                  S   PM     nnU Vs/ s H  oR                  U5      PM     nn/ n	[        U5       HG  u  p[        U[        5      (       a  U	R                  U5        M-  U	R                  U" XU
   5      5        MI     U	nUR                  cz  [        R                  R                  UR                  UUR                  S9Ul        [        R                   " UR                  5         UR                  R"                  " U6   S S S 5        UR                  R$                  nS nU H*  n[        U[        5      (       a  M  UR'                  5       n  O   Uc   e[)        UU[+        US9S	9mSU4S
 jjn[        U5       VVs/ s H  u  nnU" UU5      PM     nnnUTl        U$ s  snf s  snf ! , (       d  f       N= fs  snnf )Nr5   )constrain_to_fake_tensoreager_input_valsr   r   r  rH  r;
  rC
  r  )r=
  rX  r4  c           
       > [        U [        [        45      (       a  U $ [        [	        U R                  5       U R                  5       U R                  5       U R                  5       U R                  5       R                  S9T[        U4/SS9$ )Nr  T)r  )rr   r   r  r  r6  r   r   r   r  r   r7  r   )rA  indinvoke_subgraphs     rv   create_output,InvokeSubgraph.create.<locals>.create_output@  s    &#8:N"OPP"%002$..0#__.%002%00299 $C[M6: rx   )rA  rn   ra  rs   )r	  r]  r`   r   rJ  rF  r  r   r  r   rr   r   r  r<
  rO  r   rA
  rB
  graph_outputsr   rV  r  rF  )r  r=
  rX  r]  rJ  fake_operandsr^  fx_operandsru   new_operandsr   operandrF  r   rc  r   rA  rb  s                    @rv   r  InvokeSubgraph.create  s   6 ww+++00445GHHH,Q/3M '++AB/K4?@KqVVE]KM@
 3;;(Q%%a((;%h/LC'#899##G,##,WC6HI	 0  >>!WW22((,&mm 3 HN
 $$X^^4""M2 5 .... Gg'<== ++-   !!!($F3
	" >Gw=OP=O	6=+=OP")A A
 <( 54J Qs   "H HH2H/
H,c                &    UR                  U 5        g rq   )codegen_invoke_subgraphr)	  s     rv   r*	  InvokeSubgraph.codegenU  r$
  rx   )r   r=
  )r=
  rN  rX  zlist[TensorBox]r4  r  r   r   )r=
  rN  r>  )r   r   r   r   r!  r=
  r   rX  rF  rs  r  r  r*	  r   r  r  s   @rv   rV  rV    sq     $(H '*.H'.+/G(/
) 
),;
)EV
)	
) N N`. .rx   rV  c                     ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'                 SU 4S jjr
\        SS j5       rSS jrSS jrSrU =r$ )ConditionaliY  Nr^  	predicate7Optional[list[Union[TensorBox, ShapeAsConstantBuffer]]]rX  rW  true_subgraphfalse_subgraphrY  rF  c                  > Xl         X l        X0l        X@l        [	        U/U-   5      u  px[
        T	U ]  S UUUS9  Ub  X`l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g N)r   r4  rM  r  )rp  rX  rr  rs  _split_by_sym_typer  rs  r	  r`   r   r*  r   r+  )
r`  rp  rX  rr  rs  r4  r	  sym_argsrd	  r  s
            rv   rs  Conditional.__init__a  s     # *, 2I;3I J"	 	 	
 (%6"GG++D1		""4(rx   c                    U R                  U5      nU Vs/ s H  oPR                  U5      PM     nn[        R                  R                  R                  S   nU Vs/ s H  oUR
                  S   PM     nnX#4 H  nUR                  b  M  [        R                  R                  UR                  UUR                  S9Ul        [        R                  " UR                  5         UR                  R                  " U6   S S S 5        M     UR                  R                  n	UR                  R                  n
SU	4SU
44 H&  u  p[        U	5      (       d  M  [        SU SU 35      e   [        U	5      [        U
5      :X  d   X45       e[        [!        X5      5       H  u  nu  pUR#                  5       UR#                  5       :X  d	   XU45       eUR%                  5       UR%                  5       :X  d	   XU45       eUR'                  5       R(                  UR'                  5       R(                  :X  a  M   XU45       e   [+        S U/U-    5       5      n[-        [        R                  R.                  R0                  [        R                  R                  R
                  R3                  S	S 5      5      nUc   S
5       e[5        UUUU[7        US9US9nSS jn[        [!        U	[        R                  R                  R
                  S   5      5       VVVVs/ s H  u  nu  nn[9        [;        UR#                  5       UR%                  5       UR=                  5        Vs/ s H  nU" U5      PM     snUR?                  5        Vs/ s H  nU" U5      PM     snUR'                  5       R(                  S9U[@        U4/5      PM     nnnnnUUl!        U$ s  snf s  snf ! , (       d  f       GMq  = fs  snf s  snf s  snnnnf )Nr  r  r_  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  p   #    U  H,  n[        U[        5      (       a  M  UR                  5       v   M.     g 7frq   )rr   r   r   )r  os     rv   r  %Conditional.create.<locals>.<genexpr>  s,      
+a!67 ALLNN+s   66r	  zcannot determine devicer  )rp  rX  rr  rs  r4  r	  c                \    [        U [        5      (       a  U $ U R                  R                  $ rq   )rr   rs   r   r  )r   s    rv   _maybe_expr'Conditional.create.<locals>._maybe_expr  s"    !S!!66;;rx   r  )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.expr])"r  r`   r   rJ  r   rF  r<
  rO  r   rA
  rB
  re  rT  r  r   r   r   r   r   r   r7  rF	  r+   r   r   r  ro  r  r  r6  r   r   r   rF  )r  rp  rz  r{  rX  ru   rg  rf  r=
  true_outputsfalse_outputsr   rF  r   t_of_or   r	  conditionalr  rA  merged_outputr  s                          rv   r  Conditional.create}  s    %%i0	2:;(Q%%a((;gg**//30;<1< +H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&6 98 , }}22 44(,7*m9TUMD#L11$**./TU\T]_  V < C$66U8UU6&s<'GHMAz>>#s~~'77F!#F7==?cmmo5D}D5>>#**cnn.>.E.EETPS}TE I
  
[8+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
	& /8L!''"6"6";";E"BC/
/**FM !,,. **,4A4F4F4HI4Hb+b/4HI6C6J6J6LM6LKO6LM!,,.55 
/ 	 
& &Y << 98b JM
s<   OO$OAO8
O..O8
O3/O8

O+	.
O8
c           	         UR                  U 5        UR                  U R                  5       U R                  [	        U S0 5      5        g rR  )codegen_conditionalrS  r  rF  r   r)	  s     rv   r*	  Conditional.codegen  s9    ##D)88MMOT\\749Lb+Q	
rx   c                    [        U SS 5      =n(       aC  [        [        R                  R                  R
                  U5      nUc   eUR                  5       $ [        5       $ rR  rW  rX  s      rv   r  $Conditional.get_unbacked_symbol_defs  r[  rx   )rs  r   rX  rp  rr  r	  )rp  rn   rX  -list[Union[TensorBox, ShapeAsConstantBuffer]]rr  rN  rs  rN  r4  r  r	  z,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   )rp  rm   rz  rN  r{  rN  rX  r  r>  r  )r   r   r   r   rp  r   rX  rr  rs  rF  rs  r  r  r*	  r  r   r  r  s   @rv   ro  ro  Y  s    "&I&HLHEL(,M%,)-N&-+/G(/)) @)  	)
 !) ") H) 
)8 TT T 	T
 @T Tl
   rx   ro  c                    / n/ nU  HF  n[        U[        5      (       a  UR                  UR                  5        M5  UR                  U5        MH     X!4$ rq   )rr   r   r  r  )r   non_sym_argsrw  r  s       rv   rv  rv    sO     LHc011OOCHH%$	  !!rx   c                     ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SrS\S'   Sr	S	\S
'               SU 4S jjr
\        SS j5       rSS jrSrU =r$ )	WhileLoopi  Nrq  carried_inputsadditional_inputsrW  cond_subgraphbody_subgraphrY  rF  c                   > Xl         X l        X0l        X@l        [	        X-   5      u  pg[
        TU ]  S UUUS9  [        R                  R                  U 5      U l
        [        R                  R                  U 5        g ru  )r  r  r  r  rv  r  rs  r`   r   r*  r   r+  )	r`  r  r  r  r  r4  rw  rd	  r  s	           rv   rs  WhileLoop.__init__  su     -!2** 2>3U V"	 	 	
 GG++D1		""4(rx   c                   SSK Jn        SS jn[        R                  R                  R
                  S   n[        R                  R                  R
                  S   nXx-   n	U	 V
s/ s H  oR                  S   PM     nn
U V
s/ s H  oR                  S   PM     nn
U V
s/ s H  oR                  S   PM     nn
U V
s/ s H  oR                  U
5      PM     nn
U" X<5      nU V
s/ s H  oR                  U
5      PM     nn
U" XM5      nX4-   nX4 H  nUR                  b  M  [        R                  R                  UR                  U	UR                  S9Ul        [        R                  " UR                  5         UR                  R                  " U6   XL aZ  [        UR                  R                  5      [        U5      :X  d   eU" UR                  R                  U5      UR                  l        S S S 5        M     UR                  R                  nUR                  R                  n[        U5      (       a  [!        SU 35      e[        U5      S	:X  d   U5       eUS   n[#        U[$        5      (       dM  UR'                  5       [(        R*                  :X  d   U5       e[        UR-                  5       5      S:X  d   U5       e[        U5      S:  d   S
5       eUS   R/                  5       nUc   e[        U5      [        U5      :X  d	   UU45       e[1        [3        UU5      5       H  u  nu  nn      SS jnU" UR-                  5       UR-                  5       5        U" UR5                  5       UR5                  5       5        UR/                  5       UR/                  5       :X  d   UUUU45       eUR'                  5       UR'                  5       :X  d
   UUU45       eUR7                  5       R8                  UR7                  5       R8                  :X  a  M   UUU45       e   [;        UUUU[=        US9S9nUR                  b=  [#        UR                  R>                  [(        R@                  RB                  5      (       d   eU" UR                  R>                  U5      S   n[E        U5      nU Vs/ s H  nUU   PM
     nn[1        U5       VVs0 s H  u  nnUU;  d  M  UU_M     nnnURG                  5        VVs/ s Hu  u  nn[I        [K        UR/                  5       UR'                  5       UR-                  5       UR5                  5       UR7                  5       R8                  S9U[L        U4/5      PMw     nnnUUl'        U V s/ s H  n [Q        U RR                  U U5      PM     sn Ul*        [W        U5      n![W        U5      n"[Y        [        U5      5       Vs/ s H  nUU;   a  [[        U"5      O
[[        U!5      PM!     n#n[3        UU#5       Hk  u  n nU R]                  5       [        R                  R^                  ;   d  M4  [        R                  R`                  Rc                  UR]                  5       5        Mm     U#$ s  sn
f s  sn
f s  sn
f s  sn
f s  sn
f ! , (       d  f       GM  = fs  snf s  snnf s  snnf s  sn f s  snf )Nr   )check_input_alias_and_mutationc           	     ,   [        U 5      [        U5      :X  d   e/ n[        X5       Hh  u  p4[        U[        R                  5      (       a3  UR                  [        R                  X4R                  5       SS95        MW  UR                  U5        Mj     U$ )NFr'  )	r   r   rr   r   r_	  r  r  r	  r   )tensor_boxesfake_tensorsretr  fks        rv   _require_exact_strides0WhileLoop.create.<locals>._require_exact_strides  s     |$L(9999Cl9b%,,//JJ$::		5 ;  JJrN : Jrx   r  r  r_  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r5   z9torch.while_loop is assumed to have at least one operand.c                ~    [        X5       H.  u  p#[        R                  R                  R	                  X#5        M0     g rq   )r   r`   r   r   r  )	lhs_exprs	rhs_exprslhsrhss       rv   _guard_list_equals,WhileLoop.create.<locals>._guard_list_equalsq  s-     !$I 9HCGG$$11#; !:rx   r  )r  r  r  r  r4  r   r  )r  'list[TensorBox | ShapeAsConstantBuffer]r  z,list[Union[int, torch.SymInt, torch.Tensor]]r   r  )r  Sequence[Union[int, Any]]r  r  r   r   )2torch._higher_order_ops.utilsr  r`   r   rJ  r   rF  r  r<
  rO  r   rA
  rB
  r   re  rT  r  rr   r   r   r   r   r   r   r   r   r  r   r7  r  r  modulefxGraphModuler0   r  r  r6  r   rF  rK  r4  r	  rE	  r   rF	  r  r  r
  r@  )$r  cond_fnbody_fnr  r  r  r  fx_carried_inputsfx_additional_inputsfx_all_inputsru   fake_all_inputsfake_carried_inputsfake_additional_inputs
all_inputsr=
  cond_outputsbody_outputsr  r   r   rB  bor  
while_loopmutated_idxsmutated_idx_setr   rE  r   real_outputsrA  rO  outputs_itermutated_inputs_iterall_outputss$                                       rv   r  WhileLoop.create  sn    	Q	A	F	 5	" GG0055b9 ww3388<)@2?@-Q66%=-@6GH6Gvve}6GH9M!N9MA&&-9M!N8FG1++A.G/T;LM;La..q1;LM2
 $7
 *H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&8  *"8>>#?#?@C/E      8N$NN88/84 98 +2 }}22}}22-- XXdWeg  < A%3|3%O!233;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!>"c,&77W.,9WW7$S%FGKAxB<4<4< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC H" )/!!$F3

 }}(ZMM  %(("6"6.
 .
 	
 

 6MM  /

 %\25DE_c*S/_E &l3
3S/) CH3 	 
"  ,113
  4V !,,. **,*!,,.!,,.55 
  4 	 
 *
 &'
% 3::sJ7%'

#
 L)">2 S./
/ *-)?D$%T,EWW/ 	 
 NK8HC||~!5!55 ++//? 9 S AH!NGM 98Z F


'

sP   (Z	Z$ZZ*Z?A8Z"8Z5Z:(Z:A<[ ![$&["
Z2	c                &    UR                  U 5        g rq   )codegen_while_loopr)	  s     rv   r*	  WhileLoop.codegen  s    ""4(rx   )r  r  r  r  r   )r  r  r  r  r  rN  r  rN  r4  r  r   r   )r  rN  r  rN  r  r  r  r  r>  )r   r   r   r   r  r   r  r  r  rF  rs  r  r  r*	  r   r  r  s   @rv   r  r    s    NRNKRQUNU(,M%,(,M%,+/G(/)E) I)  	)
  ) ") 
)0 ff f F	f
 If fP) )rx   r  c                  V   ^  \ rS rSr SSS. S	U 4S jjjjrS
U 4S jjrSS jrSrU =r$ )r   i  Nr0  c          
     t  > [         TU ]  UUUUUS US9  SSKJn  U V	s/ s H&  n	[	        U	[
        5      (       a  U	R                  OU	PM(     n
n	U" U/ UQU
Q7U5      nUc   eXl        [        R                  R                  R                  US 5      U l        U [        R                  R                  U'   g s  sn	f )N)r   r	  r   )get_effect_key)r  rs  torch._higher_order_ops.effectsr  rr   r  r   effect_typer`   r   effectful_opsr  prev_effect_buffer)r`  r4  r  rd	  rH  rP	  r   r	  r  rS  uncovered_argsr  r  s               rv   rs  EffectfulKernel.__init__  s     	/ 	 	
 	C GR
FQz!_55AGG1<k 	 
 %V-O~-O-OQWX&&&&"#''"7"7";";K"N-1k*
s   -B5c                   > [         TU ]  5       nU R                  bG  UR                  R	                  [
        R                  " U R                  R                  5       5      5        U$ rq   )r  r  r  r  r@  r7   r  r  )r`  r  r  s     rv   r  EffectfulKernel.get_read_writes  sU    g-/"".!!$$T%<%<%E%E%GH rx   c                    gr  r   rg  s    rv   r
   EffectfulKernel.has_side_effects  r  rx   )r  r  rq   r>  rg  rI  )	r   r   r   r   rs  r  r
  r   r  r  s   @rv   r   r     s6     2 2 
2 2@ rx   r   c                      \ rS rSrSrg)r|	  i  r   Nr  r   rx   rv   r|	  r|	    s    rx   r|	  c                  `    \ rS rSr% S\S'   S\S'   SS jrSSS jjrSS	 jrSS
 jrSS jr	Sr
g)r  i  r   r   +Union[FakeScriptObject, torch.ScriptObject]r   c                    U R                   $ rq   rl  rg  s    rv   r  TorchBindObject.get_name  r  rx   Nc                    U R                   $ rq   rl  r  s     rv   r  !TorchBindObject.codegen_reference  r  rx   c                    U R                   $ rq   r   rg  s    rv   rX	  TorchBindObject.get_value  r  rx   c                    [        U R                  [        R                  5      (       a  U R                  $ U R                  R                  $ rq   )rr   r   r   ScriptObjectreal_objrg  s    rv   get_real_objTorchBindObject.get_real_obj   s3    djj%"4"455::::&&&rx   c                   U R                  5       n[        UR                  5       5      n[        R                  " U5      S   nU Vs/ s HE  n[        U[        R                  5      (       d  M$  UR                  5       UR                  5       -  PMG     nn[        R                  " [        R                  US5      $ s  snf r  )r  r   __obj_flatten__rG	  rS	  rr   r   r_	  r,
  numelr  r  operatorr@  )r`  real_script_obj	flat_dict
flat_elemsru   
flat_sizess         rv   get_buf_bytesTorchBindObject.get_buf_bytes   s    ++-88:;	((3A6
  
!U\\* )ANNqwwy( 	 

 j!<<
s   #B;.%B;r   r[  rq   rP  )r   r  )r   ztorch.ScriptObjectri  )r   r   r   r   r   r  r  rX	  r  r  r   r   rx   rv   r  r    s&    
I66'
=rx   r  c                  B    \ rS rSr% S\S'   S\S'   S
S jrSSS jjrS	rg)rT	  i   r   r   rT  r   c                    U R                   $ rq   rl  rg  s    rv   r  GeneratorState.get_name   r  rx   Nc                    U R                   $ rq   rl  r  s     rv   r   GeneratorState.codegen_reference   r  rx   r   r[  rq   rP  )r   r   r   r   r   r  r  r   r   rx   rv   rT	  rT	     s    
I rx   rT	  c                  j    \ rS rSrS	S jrS	S jrS
SS jjr\    SS j5       r\  SS j5       r	Sr
g)_CollectiveKerneli    c                    gr  r   rg  s    rv   r  !_CollectiveKernel.should_allocate!   r  rx   c                    gr  r   rg  s    rv   r
  "_CollectiveKernel.has_side_effects$   r  rx   Nc                Z   [        U R                  5      [        R                  R                  L d   S5       eU R                  nUR
                  R                  U l        UR
                  R                   Vs/ s H!  o3R                  (       d  M  UR                  PM#     snU l
        g s  snf )Nz,Setting cpp kernel needs a valid op_overload)r   r	  r   r	  r	  r	  r   r	  r	  r	  r	  )r`  r	  r  ru   s       rv   r
	  %_CollectiveKernel.set_cpp_kernel_name)   s    D$$%)>)>> 	
:	
> !!%~~22 #NN44.
4qFAFF4.
* .
s   7B(B(c                <   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H  n
U
R	                  5         M     US   R                  5       nU " [        US9UUWW5      n[        R                  " U5      nUR                  R                  U Vs/ s H  n[        [        US9X5      PM     sn5        UR                  R                  U Vs/ s H  oR                  5       PM     sn5        SU;   a]  UR                  R                  [        [        US9US   U5      5        UR                  R                  US   R                  5       5        g g ! , (       d  f       GNQ= fs  snf s  snf )Nr  r   r  r   )r`   r   r  ro	  r  r   r  rG	  tree_leavesr	  r  rK  r5  r  r  )r  r  rM  r   r   _example_outputrd	  re	  rP	  r	  
tensor_argr   r  inpsr  rO  s                   rv   create_inplace _CollectiveKernel.create_inplace:   s    WW ""6CDCFC!  %E2C1D&EE$%J  & Q**,f%
 !!&)&&OSTt^Jf5sCtT	

 	!!T"BTc<<>T"BCF?##**z8&-P %%fUm&<&<&>? 9 . U #Cs   FF>F
Fc           
     
   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H  n
U
R	                  5         M     [        W[        5      (       a  U R                  Xe5      nU " [        US9UUWW5      n[        U5       VVs/ s H(  u  p[        U R                  U5      U[        U4/5      PM*     snnUl        [        UR                  U5       H_  u  p[        R                  (       d  [!        U5      (       a  M,  [         R                  R"                  R%                  UR&                  5        Ma     UR                  $ U " U R                  U5      UUWW5      n[        R                  (       d  [!        U5      (       d3  [         R                  R"                  R%                  UR&                  5        U/Ul        U$ ! , (       d  f       GN= fs  snnf )Nr  r  )r`   r   r  ro	  r  rr   r   rh  r  r   r  r  rF  r   r6   r  r]   r0  r@  r   )r  r  rM  r   r   rj	  rd	  re	  rP	  r	  r  r   r  r   r9  r  s                   rv   create_out_of_place%_CollectiveKernel.create_out_of_placew   s    WW ""6CDCFC!  %F3D2E&FF$%J  & nd++__[AF!0F "+>!: ";IA ((0AYK
 ";FN  #6>>>B::BSC C GG--11#((;	  C
 >>!$$^4F 66>O? ? ))--fkk:$XFNMa *s   G-3/G?-
G<)r	  r	  rI  rq   r	  )rM  !Union[TensorBox, list[TensorBox]]r   r   )rM  r  )r   r   r   r   r  r
  r
	  r  r  r  r   r   rx   rv   r  r      sV    
	
" $@>$@	$@ $@x 3>3 3rx   r  c                  F   ^  \ rS rSrS r\SS j5       rSU 4S jjrSrU =r	$ )_WaitKerneli   c                &   U R                   S   n[        U[        5      (       a  UR                   S   /$ [        U[        5      (       aG  UR                   S   n[        U[        5      (       a!  UR                  S   u  p4UR                   U   /$ / $ / $ r  )rM  rr   r  r  r  )r`  rO  collr   r   s        rv   get_volatile_reads_WaitKernel.get_volatile_reads   s    kk!nc,--JJqM?"[)) ::a=D$ 122QC())I Irx   c                v   [         R                  R                     U R                  X5      u  nnnnnS S S 5        W(       a   U SU 35       eU " [	        UR                  5       S9UWWW5      nUR                  R                  [        [	        UR                  5       S9X(5      5        g ! , (       d  f       N}= f)Nr  r  )	r`   r   r  ro	  r  r   r	  r  rK  )	r  r  rO  r  rd	  re	  rP	  r	  r  s	            rv   create_wait_WaitKernel.create_wait   s    WW ""6/!  %E2C1D&EE$cnn./
 	&&:S^^-=>L	
! s   B**
B8c                   > [         TU ]  5       nU R                  5       nU H@  nUR                  R	                  [
        R                  " UR                  5       5      5        MB     U$ rq   )r  r  r  r  r@  r7   r  r  )r`  r  volatile_readsvrr  s       rv   r  _WaitKernel.get_read_writes   sS    g-/002 B!!,"6"6r{{}"EF !rx   r   )rO  rm   r   r   rg  )
r   r   r   r   r  r  r  r  r   r  r  s   @rv   r  r     s&    * 
 
* rx   r  c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ rq   )rr   r.   r   r(   r   r   r0   r   r   r	  r   r_	  r   r  r   s      rv   r	  r	     s    !h%&&$Q''	At}	%	%u||$&A,Q//A 	Au||	$	$$Q''|rx   c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ rq   )rr   r.   r   r'   r   r   r0   r   r   r	  r   r_	  r  s      rv   r	  r	     s    !h%&&A	At}	%	%u||$&A#A&&A 	Au||	$	$A|rx   )ru   r   r   r   )r   r   r   r   )r   r   r   r]  )r   Sequence[int]r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   r  )ru   r)   r   r   r   r}   rq   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   r  )r   Sequence[Union[int, Integer]]r   r  rC  )ru   zLiteral[None]r   r   r   r   )ru   rn   r   r   r   r  )ru   r^  r   r   r   zOptional[torch.Tensor])r   zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])ru   z2Union[IRNode, OutputSpec, torch.device, None, str]r   rO  )ru   z&Union[IRNode, torch.device, None, str]r   r   )ru   zUnion[Buffer, TensorBox]r  rs   r   r   )r%  rZ  r&  rZ  r'  rZ  r   r   )r9  Union[TensorBox, BaseView]r:  z"Sequence[Union[int, torch.SymInt]]r   r  )rH  rR
  r   r   )rM  r  r   re  )r   zUnion[Expr, Sequence[Expr]]r   rF  r   r_   )r_  r   r   rF  r^  r   r   rb  )ru   rn   r   r   )TFNFN)ru   rn   r  r   r%  r   r&  'Optional[Sequence[Union[int, Integer]]]r  r   r  r  r   ztuple[StorageBox, Layout])ru   rn   r&  r  r   r   )r   rn   r   r   )r   rZ  r'  rZ  r   r   )r   rF  r   rs   )rS  rA  r   r   )r   rS
  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   r	  (-  
__future__r   rs  rp  r  r  loggingr  textwraprW  r  collections.abcr   r   r   r   r   enumr	   r
   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   unittest.mockr   r   r   r   r   torch._export.serde.schema_exportserderI  r}  torch._library.utilsrD  r  r<  torch._loggingr   torch.fxtorch.utils._pytree_pytreerG	  torch._dynamo.utilsr   torch._export.serde.serializer   *torch._higher_order_ops.auto_functionalizer   torch._inductorr   torch._prims_commonr   r    r!   r"   r#   torch._subclasses.fake_tensorr$   %torch.fx.experimental.symbolic_shapesr%   r&   r'   r(   r)   r*   r+   r,   r-   r.   torch.utils._ordered_setr0   torch.utils._sympy.functionsr1   r2   r3   torch.utils._sympy.symbolr4   r  r6   r7   codegen.commonr8   r9   r:   r;   r<   r=   r>   r?   r@   	loop_bodyrA   ops_handlerrB   rC   rD   rE   runtime.benchmarkingrF   runtime.hintsrG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   virtualizedr^   r_   r`   "torch._library.fake_class_registryra   torch.fx.noderb   codegen.cuda.cuda_templaterc   r   rd   re   r   r   r  __version__r  r  ImportErrorrf   rg   rh   rs   ri   r  rj   	getLoggerr   r  r  r-	  r   r   ro   rw   rc  r{   r   r   r   r   r   r   r  r  r   r   r   r   r  r  r  r  r   r+  r?  rJ  rP  rn   rw  r  r  r  r4  rG  rr  rt  INNER_FN_TYrh  rx  r  r  r  r  r   r"  r5  r-  r/  r3  r   r  r  r  r9  r8  r#  r5  rW  rb  rm  ry  r}  r  r  r6  r   r  r  r  r  r+  r)  r  r  r  r.  r  r   r  r%  rC  r   r   PrimitiveInfoTyper]  r~  r  r  r  r'  r  r  r  r	  r	  r  rK  r
  r
  r
  r5
  rU
  r
  r
  r
  r
  r
  r
  r
  r
  r   r   r-  r/  r  r  r  r8  rm   r(  rN  rT  rV  ro  rv  r  r   r|	  r  rT	  r  r  r	  r	  r   rx   rv   <module>r?     s   "          9 9 :  
 
 
 = <   ' ' 2 2 , ,   $ $ ( ? M #  :   0 L L * "     N N - :     . * ) C"8$% %L)$''NJ T]T]T]CI&) &C,-) -!			8??4	8yy~~'T  k	sDk!12K8STU	i 	) d#  $$$D44 , ! $  TX	1>P	 TX
	1
>P

 
 N 
 N 
 O 
 O .2&*8!%	>9	>	>;('0     
	.#G&#G/#G  #GL'x, x,v UH H HV ~
F ~
 ~
B& 
 
 
@ 
i 
 
F |$y!y!u=)< 8  JN<N<N +<NBF<N<N~ i
 i
 i
X 7AB7S9 7St#1 #L[
+ [
| D
5 D
 D
P 	 	 	 U5 U Up	 !<@=A999 9 :	9
 9 ;9 9x:	$ \
v \
 \
~ K K K\ (( ( (V 79( 79 79t (  : j; j jZ Rh R Rj % % %POA OAd 6  " K| K K$ S| S S'9	<7 7 QTZ QT QTh& $I7V I7XPf P2T $%{ $%N   .Q* Q*h UV]  D U&fi & & & 
K 
[ 
& 6  & F   UG4_ G4 G4T
B
_ B
JK> K\ #udCeCeT<Q6R1SST 6$ 6$r"| "
D=. D=N5 50( (& U>? > >B h9 hV UM< M M` U l    F
/ 
(&" &"RV .;L ;|%
 %
P
- 
$/
\ /
dul up&)| &)T-, -`< 6B5 B"F- F,H)l H)V,)| ,)^!T !TH-L -46;< 6;r U  
k*& k*\ U
. 
 
< 
  &
, &
V S S Sl+
 +T% T%n U*v * *L Ug.\ g. g.T UH , H  H V"
"2" UI) I) I)X,n ,^	6 	 =l = =@ \  K K\2# 2p  oC  NJs   &e 
e"!e"