
    7h6                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJrJrJrJrJr  S SKJr  S SK	Jr  S SK J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+  S SK,J-r-J.r.J/r/J0r0J1r1J2r2  S SKJ3r3  S SK4r4S SK5r5S S	K6J7r7  S S
K8J9r9  S SK:J;r;  S/r<\)(       a^  S SKJ=r=J>r>J?r?  S SK5J@r@JArAJBrB  S SKCJDrD  S SKEJFrF  S SKGJHrH  S SKIJJrJ  SSKKJLrL  SSKMJNrN  SSKOJPrP  SSKQJRrRJSrSJTrTJUrUJVrVJWrWJXrX  SSKYJZrZ  SSK[J\r\J]r]  / SQr^\*" S5      r_\R                  GSS j5       raS SKbJcrc  S SKdJere  S SKfJgrg  S S KhJiri  S S!KjJkrk  S S"KlJmrm  S S#KnJoroJprpJqrqJrrrJsrs  S S$KtJuruJvrv  S S%KwJxrxJyry  SS&KzJ{r{  SS'K|J}r~  \R                  S(:H  r\GR                   " \5      r\*" S)5      r\\4GR
                  \4GR
                  4   r\'\+\5GR                  \\5R                  4      rS*S+S,.rS-rS-rS-rS.rS/r\\S-
  -  S :X  a  \S0:  d   S15       eGSS2 jrGSS3 jr " S4 S5\4GR$                  5      r\GR(                  " S6S79 " S8 S95      5       rGSGSS: jjr GS       GSS; jjr\R                  GSS< j5       rGSS= jrGSS> jrGSS? jrGSS@ jr      GSSA jr}GSSB jr    GSSC jrGSSD jr    GSSE jrGSSF jrSG 4     GSSH jjr        GSSJ jrGSGSSK jjr  GS         GS SL jjr     GS!             GS"SM jjrGS#SN jrGS$SO jrGS%SP jrGS&SQ jrGS'SR jr\/" SS5      r\*" STS6SU9r " SV SW\(\$\\4   5      rGS(SX jr    GS)SY jr      GS*SZ jr      GS+S[ jr GS,     GS-S\ jjr      GS.S] jrGS/S^ jrGS0S_ jrGS1S` jrGS2Sa jrGS3Sb jrGS4Sc jrGS5Sd jrGS6Se jrGS7Sf jr    GS8Sg jrGS9Sh jrGS:Si jrS SKrGS;Sj jr/ rSI\Sk'   GS<Sl jrGS;Sm jr\GR                     GS=       GS>Sn jj5       r\r\r\rGS?So jr      GS@Sp jr\GR                  " S05      GSASq j5       r " Sr Ss\&5      r\GR(                   " St Su5      5       r " Sv Sw5      r " Sx Sy\5      r\GR                  GSBSz j5       r " S{ S|5      r " S} S~\5      r\R                  GSCGSDS jj5       r\GR                  GSES j5       rGSES jr GS,       GSFS jjr      GSGS jrGSHS jrGSHS jrSSS.       GSIS jjrGSJS jrGSKS jrGSLS jrSrSr/ SQr\+\\4GR
                  4   rS\S'   GSMS jr\R                  GSNS j5       r\R                  GSOS j5       r\R                  GSPS j5       rGSQS jrGSKS jrGSKS jrGSQS jrGSQS jr        GSRS jr    GSS               GSTS jjrGSS jr " S S5      r        GSUS jr        GSUS jrGSVS jrGSWS jrGSXS jr        GSXS jr        GSYS jr\GR                        GSZS j5       r GS,     GS[S jjrGS\S jrGS]S jrGS^S jrGS^S jrGS_S jGr GS`S jGr\GR                  GSaS j5       GrGSES jGr\R                  GSES j5       Gr\R                  GSAS j5       Gr\R                  GSES j5       GrGSES jGrGSbS jGrGScS jGr	GSS jGr
GSS jGrGSdS jGrGS7S jGr " S S\GR                  5      Gr          GSeS jGrGSfS jGr    GSfS jGr GS,     GSgS jjGrGShS jGrGSiS jGrGSiS jGr      GSjS jGr        GSkS jGrS 4           GSlS jjGrS 4           GSlS jjGrGSmS jGrGSnS jGr\GR(                   " S S5      5       Gr\GR                  GSoS j5       GrGSpS jGrGSqS jGr GSrS jGr!GSsS jGr"              GStS jGr#GSuS jGr$GSvS jGr%GSwS jGr&GSxS jGr'        GSyS jGr(GSzS jGr)        GS{S jGr*GS|S jGr+ GS,       GS}S jjGr,      GS~S jGr-GSS jGr.      GSS jGr/GSS jGr0GSpS jGr1SSSSSSSS.Gr2G\2GRg                  5        V Vs0 s H  u  pX_M	     snn Gr4\GRj                  " S5      Gr6GSS jGr7GSS jGr8GSS jGr9GSS jGr:\R                  GSS j5       Gr;\GR(                   " S S5      5       Gr<0 Gr=S\S'           GSS jGr>\9" 5       Gr?S\S'   GSS jGr@GSS jGrAGSS jGrB\*" S5      GrC\*" S5      GrD " S S\G\CG\D4   5      GrE\." S6S9GS,S6S7.GSS jjj5       GrFGSS jGrG GS       GSS jjGrH " GS  GS\GR                  5      GrI\R                  GSGS j5       GrJGSGS jGrKGSGS jGrLGSGS jGrMGSGS jGrNGSGS jGrOGSGrPGSGS	 jGrQGSGS
 jGrRgs  snn f (      )annotationsN)
CollectionIteratorMappingMutableMapping
MutableSet)datetime)StringIO)AnyCallablecastGenericLiteral
NamedTupleOptionalProtocolTYPE_CHECKINGTypeVarUnion)Concatenatedataclass_transform	ParamSpecSelf	TypeAlias	TypeGuard)mock)DeviceProperties)
OrderedSet)tree_map_only!activation_quantization_aten_pass)IterableSequence
ValuesView)SymBoolSymFloatSymInt)ELEMENTWISE_TYPE_PROMOTION_KIND)GraphModule)ShapeEnv)Node   )WorkspaceArgPythonWrapperCodegenGraphLowering)BufferExternKernelExternKernelOutIRNodeLayout	OperationReinterpretViewCompiledFxGraph)BaseSchedulerNodeSchedulerBuffer)cudampsxpuTc                     [          V s/ s H*  n [        [        U 5      R                  5       (       d  M(  U PM,     nn [	        U5      S::  d   e[	        U5      S:X  a  SnU$ UR                  5       nU$ s  sn f )Nr+   r   r<   )	GPU_TYPESgetattrtorchis_availablelenpop)x
avail_gpusgpu_types      O/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/utils.pyget_gpu_typerK   ^   sh    &KY'%*;*H*H*J!YJKz?aZA-vHO 4>>>3CHO Ls
   'A2A2)get_interface_for_device)detect_fake_mode)
DeviceType)	EventList)GraphTransformObserver)	ShapeProp)CeilDivCleanDivFloorDivIdentityModularIndexing)make_symbolSymT)bound_sympyValueRanges)config)ceildivwin32_Tz.cubinz.spv)r<   r>         @      zmust be power of 2c                *    U [         -   S-
  [         * -  $ )z/Round up to the nearest multiple of ALIGN_BYTESr+   )ALIGN_BYTES)nbytess    rJ   _alignrf      s    [ 1$44    c                   [        U [        R                  [        R                  45      (       a#  [	        [        [        U R                  5      5      $ [        U [        5      =(       d"    [        R                  " U [        5      [        :H  $ )z:v can be statically proven to be a multiple of ALIGN_BYTES)
isinstancesympyAddMaxallmap_is_alignedargsaligngcdrd   )vs    rJ   ro   ro      sT    !eii+,,3{AFF+,,aK599Q#<#KKrg   c                  4    \ rS rSrSrSrSr\SS j5       rSr	g)	rq      z<Symbolically round up to the nearest multiple of ALIGN_BYTESr+   Tc                    [        U[        [        R                  45      (       a  [	        [        U5      5      $ [        U5      (       a  U$ g N)ri   intrj   Integerrf   ro   )clsvalues     rJ   eval
align.eval   s<    ec5==122#e*%%uL rg    N)r|   
sympy.ExprreturnzOptional[sympy.Expr])
__name__
__module____qualname____firstlineno____doc__nargs
is_integerclassmethodr}   __static_attributes__r   rg   rJ   rq   rq      s!    FEJ rg   rq   Tfrozenc                  B    \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
rg)GraphPartitionMap   zH
Mapping from the partition info (e.g., input/output) to the graph info
ry   idzlist[Optional[int]]input_index_mappingoutput_index_mapping	list[str]constant_namesr   N)r   r   r   r   r   __annotations__r   r   rg   rJ   r   r      s$    
 	G -,-- rg   r   c           
        U " 5         [         R                  R                  5         [         R                  " [	        S5      [         R
                  SS9n[         R                  R                  SS9n[         R                  R                  SS9nUR                  5         [        S5       H  nUR                  5         U " 5         M     UR                  5         [         R                  R                  5         UR                  U5      S-  n[        S[	        X-  5      5      n[        S[	        X'-  5      5      n	[        U5       H
  nU " 5         M     [        U	5       Vs/ s H   n[         R                  R                  SS9PM"     nn[        U	5       Vs/ s H   n[         R                  R                  SS9PM"     nn[         R                  R                  [         R                  R                  R                  /S9 n
[         R                  R                  5         [        U	5       Hp  nUR                  5         XK   R                  5         [         R                  R                   R                  S	5         U " 5         S
S
S
5        X[   R                  5         Mr     [         R                  R                  5         [         R"                  " [%        XE5       VVs/ s H  u  pUR                  U5      PM     snn5      nS
S
S
5        [         R&                  " W5      R)                  5       n[*        R-                  S5        [*        R-                  W
R/                  5       R1                  SSS95        [3        U
R5                  5        Vs/ s H7  nUR6                  [8        R                  :X  d  M#  SUR:                  ;   d  M5  UPM9     sn5      nU(       a#  U[<        R&                  " S U 5       5      S-  -  n[*        R-                  SU5        U$ s  snf s  snf ! , (       d  f       GN= fs  snnf ! , (       d  f       GN9= fs  snf ):  
Returns benchmark results by examining torch profiler events.
This could be more accurate as it doesn't count CPU side overhead.
However, this also requires manually excluding irrelevant event, e.g.
vectorized_elementwise_kernel which is used to fill L2 cache,
various CUDA events, etc, so could also be fragile.
    Ar<   dtypedeviceTenable_timing   r+   
activitiesRunCudaModuleN
raw eventsself_device_time_totalsort_by	row_limitfused_abs_max_0c              3  8   #    U  H  oR                   v   M     g 7frx   device_time_total.0events     rJ   	<genexpr>fp8_bench.<locals>.<genexpr>   s     Q33        @@profiling results: %s ms)rC   r<   synchronizeemptyry   float16Eventrecordrangezero_elapsed_timemaxprofilerprofileProfilerActivityCUDAnvtxtensorzipmeanitemlogdebugkey_averagestablerO   eventsdevice_typerN   name
statistics)fnwarmuprepcachestart_event	end_event_estimate_msn_warmupn_repeatpisetimesresr   filtered_eventss                     rJ   	fp8_benchr      s5    D	JJKKJu}}VLE **"""6K

  t 4I1X
  	JJ**959K 1c&./0H1c#+,-H 8_
  BGxQA5::##$#7KQ?DXO!!!!5IO			NN++00
 
  
 


 xAKKMN!!#&&7 8L! ! 	

 +.{+FG+F41Q^^A+FG

" **U

 
 
"CIIlIIann$$-EQS$TU 	
#  JOO3 8IUZZ8W #	
O OOQQQ	

 II(#.JI RO 87
 H
 
*	
sP   'P
'P;A8P,3P;AP,P&2P,"P>)P>;P>
P#P,,
P;c                X   U " 5         [         R                  R                  5         [         R                  " [	        S5      [         R                  SS9n[         R                  R                  SS9n[         R                  R                  SS9nUR                  5         [        S5       H  nUR                  5         U " 5         M     UR                  5         [         R                  R                  5         UR                  U5      S-  n[        S[	        X-  5      5      n[        S[	        X'-  5      5      n	[        U5       H
  nU " 5         M     [         R                  R                  5         [         R                  R                  [         R                  R                  R                  /S9 n
[        U	5       H  nUR                  5         U " 5         M     [         R                  R                  5         S	S	S	5        [        R!                  S
5        [        R!                  W
R#                  5       R%                  SSS95        ['        U
R)                  5        Vs/ s H7  nUR*                  [,        R                  :X  d  M#  UR.                  S:w  d  M5  UPM9     sn5      n[1        U5      U	-  S:w  a  [3        S[1        U5      U	5      e[1        U5      U	-  n['        [5        U5       VVs/ s H  u  pX-  S:w  d  M  UPM     snn5      nUR7                  5         UR#                  5       n[        R!                  S5        [        R!                  UR%                  SS95        [9        S U 5       5      S-  U	-  n[        R!                  SU5        U$ ! , (       d  f       GN= fs  snf s  snnf )r   r   r<   r   Tr   r   r+   r   Nr   r   r   r   zContext Syncr   zYFailed to divide all profiling events into #repeat groups. #CUDA events: %d, #repeats: %szprofiling time breakdown)r   c              3  8   #    U  H  oR                   v   M     g 7frx   r   r   s     rJ   r   +do_bench_using_profiling.<locals>.<genexpr>R  s     A=%%%=r   r   r   )rC   r<   r   r   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rO   r   r   rN   r   rE   RuntimeError	enumerate_build_treesum)r   r   r   r   r   r   r   r   r   r   r   r   r   r   num_event_per_groupactual_eventsr   s                    rJ   do_bench_using_profilingr     s    D	JJKKJuyyHE **"""6K

  t 4I1X
  	JJ**959K 1c&./0H1c#+,-H 8_
  
JJ			NN++00
 
  
 
xAKKMD	 ! 	

 
 IIlIIann$$-EQS$TU 	
#  JOO3 8=

n8T #	
O ?h&!+- 	
 	
 o.9 &o6	
6&!+ 6	
M !..0MII()IIm!!B!/0
A=A
AF
JX
UCII(#.J_
 
$	
	
s+   -AN"N! N!N!(N&
:N&

Nc                     SSK Jn   [        R                  R	                  SS5        U S L=(       a%    [        [        [        R                  SS 5      S5      $ ! [         a     g[         a  nS[        U5      ;   d   e S nAgS nAff = f)	Nr   )	roi_alignztorchvision::nmsMetatorchvisionr   Fztorchvision::nms does not exist)torchvision.opsr   rC   _C%_dispatch_has_kernel_for_dispatch_keyhasattrrB   opsImportErrorr   str)r   r   s     rJ   has_torchvision_roi_alignr   W  s|    -667I6R$ 
EII}d3[*
 	
   0CF:::s   AA 
B$	B-BBc                t   U c   [         R                  " S5      R                  $ [        U [        5      (       a  [         R                  " U 5      n U R
                  S;  aY  U R                  cL  [        U R
                  5      n[         R                  " U R
                  UR                  R                  5       S9$ U $ )Ng        )cpumeta)index)
rC   r   r   ri   r   typer   rL   Workercurrent_devicer   device_interfaces     rJ   decode_devicer   g  s    ~||C '''&#f%{{/)fll.B3FKK@||FKK/?/F/F/U/U/WXXMrg   c                ~    [         R                  " [        R                  U [        R
                  R                  5      $ rx   )	functoolsreduceoperatormulrj   SOne)its    rJ   sympy_productr  r  s#    HLL"eggkk::rg   c           	         [        U 5      [        U5      :X  d   e[        R                  " [        S [	        X5       5       5      5      $ )Nc              3  .   #    U  H  u  pX-  v   M     g 7frx   r   )r   abs      rJ   r   sympy_dot.<locals>.<genexpr>x  s     >odaAEos   )rE   rj   expandr   r   )seq1seq2s     rJ   	sympy_dotr  v  s6    t9D	!!!<<>c$o>>??rg   c                b    U  Vs0 s H  n[        U5      U_M     snR                  5       $ s  snf rx   )r   values)r  rG   s     rJ   uniquer  {  s+     !bBqE1Hb!((**!s   ,c           
        [        U [        R                  5      (       d  [        U[        R                  5      (       a4  [        [        R                  " U 5      [        R                  " U5      5      $ [        U [
        5      (       a  [        U[
        5      (       d$   U  S[        U 5       SU S[        U5       35       e[        X5      $ )Nz: , )ri   rj   ExprrR   sympifyry   r   runtime_ceildiv)numberdenoms     rJ   r\   r\     s     &%**%%E5::)F)Fu}}V,emmE.BCC fc""z%'='= ("T&\N"UG2d5k];= 6))rg   c                t   U c  g[        U 5      R                  S5      S   n0 SS_SS_SS	_S
S_SS_SS_SS	_SS_SS_SS_SS_SS_SS_SS_SS_SS _S!S"_SS#S$S%S&.EnUR                  [        UR	                  5       5       Vs0 s H  o3U_M     sn5        [        U [         5      (       a  U $ S'X!    3$ s  snf )(Nz*i8.r   booli1
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8e4b15x4
fp8e4b15x4float8_e4m3fnfloat8_e5m2float8_e8m0fnuu8float4_e2m1fn_x2r   fp16bfloat16bf16float32fp32float64fp64int8i8int16i16int32i32int64i64u16u32u64)uint8uint16uint32uint64*)r   splitupdatelistr  ri   )key	dtype_strtysrs   s       rJ   _type_ofrJ    sW   
 {Cs#B'Ii 	G 	z	
 	 	 	w 	$ 	D 	6 	F 	6 	6  	!" 	#$ 	%& 	'( /C4 JJd3::<0101012S#&&3@a/?,@@ 2s   B5c                Z    U  Vs/ s H  n[         R                  " U5      PM     sn$ s  snf )z
Gets the shape and stride of a tensor. For non-symbolic tensors, this is
trivial. But for symbolic tensors, we need to map from SymIntNode into
sympy.Expr.
)rj   r  lstr   s     rJ   convert_shape_to_inductorrN    s%     '**cEMM!c***s    (c                    SSK Jn  [        U [        5      (       a  U $ [        U [        R
                  5      (       a  [        U 5      $ UR                  R                  R                  R                  U SS9$ )zD
Like convert_shape_to_symint, but operates on a single expression.
r+   VN)hint)
virtualizedrQ  ri   ry   rj   rz   graphsizevars	shape_envcreate_symintnode)r   rQ  s     rJ   convert_to_symintrX    sk      a 	

 !U]]++ F	 !!++==ad=Krg   c                D    U  Vs/ s H  n[        U5      PM     sn$ s  snf )zn
Takes a list of shapes from Inductor and converts them into symints (or just
ints if all shapes are static).
)rX  rL  s     rJ   convert_shape_to_symintrZ    s"     +..#Qa #...s   c                N    [        S U R                  R                   5       5      $ )z%
Does this op overload have aliasing
c              3  <   #    U  H  oR                   S Lv   M     g 7frx   )
alias_infor   r  s     rJ   r   is_view.<locals>.<genexpr>  s     F1EA||4'1Es   )any_schema	argumentsops    rJ   is_viewre    s     F1E1EFFFrg   c                    gNFr   )r   s    rJ   <lambda>rh    s    rg   c                  ^ U R                   S:X  d  g[        U R                  [        R                  R
                  5      (       d  U R                  [        R                  L d  g[        [        R                  R
                  U R                  5      nU[        R                  L d  [        U5      (       a  [        U4S jU R                   5       5      $ [        R                  R                  UR                  ;   =(       d    T" U5      $ )z
Do all uses of this op have torch.Tag.pointwise or return True for optional `is_pointwise_fn`

Uses in views ops will follow the views uses
call_functionFc              3  <   >#    U  H  n[        UT5      v   M     g 7frx   )is_pointwise_use)r   uis_pointwise_fns     rJ   r   #is_pointwise_use.<locals>.<genexpr>  s     KA#A77s   )rd  ri   targetrC   _ops
OpOverloadr  getitemr   re  rm   usersTag	pointwisetags)usern  rp  s    ` rJ   rl  rl    s     66_$3::uzz4455xGWGW9W%**''4F!!!WV__KKKK99&++-H1HHrg   	list[Any]c           	       ^^ [         R                  R                  5       m/ mSUU4S jjnTR                  " U /[	        [         R
                  X1U45      Q76 n[        U R                  R                  5      S:X  a3  [        U R                  R                  S   R                  5      S:X  a  U4nTR                  U5        [         R                  R                  0 T5      nUT4$ )Nc                `   > TR                  U 5        TR                  S[        T5       35      $ )Narg)appendplaceholderrE   )r|  g
graph_argss    rJ   add_tensor_arg)gen_gm_and_inputs.<locals>.add_tensor_arg  s,    #}}s3z?"3455rg   r+   r   Tensor)r|  torch.Tensorr   r*   )rC   fxGraphrj  r   r  rE   ra  returnsr   r   outputr(   )rp  rp   kwargsr  nodegmr  r  s         @@rJ   gen_gm_and_inputsr    s     	A%'J6 6 ??u||^F^LD 	FNN""#q(&&q)../8;wHHTN			b!	$Bz>rg   c                t    U S:X  a  g [        U 5      nUR                  5       (       a  UR                  5         g g Nr   )rL   rD   r   r   s     rJ   r   r     s7    /7$$&&$$& 'rg   c                    [        U5        [        R                  " S5        [        R                  " 5       n[        U5       H  nU " U6 n[        U5        M     [        R                  " 5       nWc   eXt-
  $ )Ni9  )r   rC   manual_seedtimeperf_counterr   )modelexample_inputsr   r   t0r   resultt1s           rJ   timedr    sk     	d				B5\'F  
			B7Nrg   c                    [         R                  " [        U5       Vs/ s H  n[        XX%5      PM     sn5      n[         R                  " U5      U-  n[        X-  S 5        UR                  5       $ s  snf )Nz.6f)rC   r   r   r  medianprintr   )	r  r  r   repeatbaseliner   r   timingstooks	            rJ   print_performancer  *  se     ll>CFmLmue	4mLG << 5(D	T_S!#99;	 	Ms   A3c                F   ^ [        X5      " 5       m[        XU4S j5        g)zKReplace obj.method() with a new method that returns a precomputed constant.c                    > T $ rx   r   )r  s   rJ   rh  #precompute_method.<locals>.<lambda>=  s    rg   N)rB   setattr)objmethodr  s     @rJ   precompute_methodr  :  s    S!#FC(rg   c                ,    U H  n[        X5        M     g)zFReplace methods with new methods that returns a precomputed constants.N)r  )r  methodsr  s      rJ   precompute_methodsr  @  s    #& rg   c                8    [        X:  5      [        X:  5      -
  $ rx   )ry   )r  r  s     rJ   cmpr  F  s    qu:AE
""rg   c                    [        U [        5      (       a  U /U-  $ [        U 5      S:X  a  [        U 5      " U S   /5      U-  $ U $ )Nr+   r   )ri   ry   rE   r   )rG   sizes     rJ   pad_listliker  J  sD    !SsTz
1v{Aw!v%%Hrg   c                @    [        U 5      S:X  a  / $ SS jn[        XS9$ )Nr   c                    [        U [        5      (       a  U $ SSKJn  [        X5      (       d   eU R	                  5       $ )Nr+   )r:   )ri   r   	schedulerr:   get_name)elemr:   s     rJ   	sort_functuple_sorted.<locals>.sort_funcW  s4    dC  K0$2222}}rg   rG  )r  r^   r   r   )rE   sorted)rG   r  s     rJ   tuple_sortedr  S  s$    
1v{	 !##rg   PRV)	covariantc                  2    \ rS rSr\SS j5       rSS jrSrg)CachedMethodig  c                    g rx   r   )r   s    rJ   clear_cacheCachedMethod.clear_cacheh  s    ),rg   c                    g rx   r   selfrp   r  s      rJ   __call__CachedMethod.__call__k  s    rg   r   N)r   r   r   None)rp   P.argsr  P.kwargsr   r  )r   r   r   r   staticmethodr  r  r   r   rg   rJ   r  r  g  s    , ,Drg   r  c           	        ^ U R                   nSU S3mSU 0n[        SU ST ST S3R                  5       U5        [        R                  " U 5      " X! S3   5      nS
U4S	 jjnXCl        U$ )N___cacher   z        def zC_cache_on_self(self):
            try:
                return self.zy
            except AttributeError:
                pass
            rv = fn(self)
            object.__setattr__(self, "z%", rv)
            return rv
        _cache_on_selfc                B   > [        U T5      (       a  [        U T5        g g rx   )r   delattrr  rG  s    rJ   r  "cache_on_self.<locals>.clear_cache  s    4D# rg   )r  r   r   r  )r   execlstripr  wrapsr  )r   r   ctxwrapperr  rG  s        @rJ   cache_on_selfr  o  s    ;;DtfF
C *CF  E "' (+e ,			 FH oob!#n&=">?G &Nrg   c           
        SSK Jn  [        U [        5      (       ay  [        R
                  " [        R                  U  Vs/ s H?  n[        US5      (       d  M  UR                  (       d  M)  UR                  R                  PMA     sn[        5       5      $ [        XR                  5      (       a  U R                  $ [        5       $ s  snf )Nr+   irr  ) r  ri   rF  r  r  r  or_r   r  originsr   r2   )node_scheduler  r  s      rJ   aggregate_originsr    s     -&&LL *)D4( "-1YY "		!!)
 L
 	
 
M??	3	3$$$|s   C
C
+C
c                &   [        U 5      nUS:X  a~  U Vs/ s H\  nUR                  S:X  d  M  SUR                  ;   d  M'  UR                  S   c  M9  UR                  S   R                  R                  PM^     nn[        [        U5      5      nOUS:X  a  / nU H  nUR                  S:X  d  M  SUR                  ;   d  M'  UR                  S   S   n[        US   [        5      (       a  UR                  US   5        Mg  UR                  US   R                  5        M     [        [        U5      5      nO:US:X  a.  U Vs/ s H   o3R                  S:X  d  M  UR                  PM"     nnO[        eUnSR                  S	/U-   5      $ s  snf s  snf )
Noriginal_atenrj  rC   source_fn_stackr   r+   inductor_noder   fused)r  rd  r   _overloadpacketr   r  r   ri   r   r}  r   NotImplementedErrorjoin)r  descriptive_namesall_originsoriginsources	source_fns         rJ   get_fused_kernel_namer    s    $M2KO+ &
%yyO+ B  6;;. B O,	 BFKK(88AA% 	 
 G,-	g	%!FyyO+0AV[[0P"KK(9:2>	ilC00NN9Q<0NN9Q<#8#89 " G,-	o	-&1
&1FYY/5QKFKKk 	 
 "!G88WI'((5
(
s"   F	F	 F	'F	FFc                  ^ [        U 5      nU Vs/ s H  o3R                  S:X  d  M  UPM     nn[        R                  " [        5      n[        R                  " [        5      nS m[        U5      (       a  [        S U 5       5      n[        U5      S:X  ac  US   R                  m[        TS5      (       d0  [        TR                  5       VV	s0 s H  u  pX_M	     n
nn	U
Tl        UR                  U4S jS9  U H  nSUR                  ;   aO  UR                  S   b?  [        UR                  S   R                  5      nXl   R!                  UR"                  5        S	UR                  ;   d  Mt  UR                  S	   S   R"                  nX\   R!                  UR"                  5        M     Tb  S
OSnUR$                   SU SSR'                  UR)                  5       5       SSR'                  UR)                  5       5       S3nUR$                   S3/n[+        UR-                  5       5       HA  u  nnUR!                  UR$                   SU SSR'                  [+        U5      5       35        MC     TbU  UR!                  UR$                   S35        U H1  n	UR!                  UR$                   SU	R/                  5        35        M3     USR'                  U5      4$ s  snf s  sn	nf )Nrj  c              3  8   #    U  H  oR                   v   M     g 7frx   )rT  )r   ns     rJ   r   &get_kernel_metadata.<locals>.<genexpr>  s     "CNq77Nr   r+   r   )_inductor_kernel_metadata_node_to_idx_mapc                "   > TR                   U    $ rx   )r  )r  single_graphs    rJ   rh  %get_kernel_metadata.<locals>.<lambda>  s    lTTUVWrg   r  r  	from_nodezTopologically SortedUnsorted z Source Nodes: [r  z], Original ATen: []z" Source node to ATen node mapping:z   z => z Graph fragment:
)r  rd  collectionsdefaultdictrF  rE   r   rT  r   r   nodesr  sortr   r   r  r}  r   commentr  keysr  itemsformat_node)r  r  r  r  inductor_nodesfrom_node_dictoriginal_aten_dictunique_graphsidxr  node_to_idx_mapr  rG  sort_strmetadatadetailed_metadataoriginal_noder  r  s                     @rJ   get_kernel_metadatar    s    $M2K+6W;)):Vf;NW ,,T2N$006
 L
>""CN"CC}")!,22L<)TUU8A,BTBT8U"V8Ufc168U"VIXFW    dii'DIIo,F,Rdii0@@AC#**4995$))#))K(+00C&&tyy1  *6)A%zH??
1XJ&6tyyATATAV7W6X Y99%7%<%<%>?@	C  $OO,,NOP &~';';'= >u  s=/diiu6N5OP	
 !?   GOO#44D!EFA $$'8AMMO;L%MN  
 TYY0111c X #Ws   KKK$c                   [        U 5      n [        U 5      nU (       ak  U R                  5       nUR                   HB  nU(       a  U" U5      (       a  M  XB;  d  M   UR	                  U5        U R                  U5        MD     U (       a  Mk  U$ )zJReturns the set of nodes whose values depend on those within initial_queue)rF  r   rF   rt  addr}  )initial_queueskip_filterdominated_setr  users        rJ   dominated_nodesr    sx    
 'M}-M
  "JJD{400(!!$'$$T*  - rg   c                B  ^^ SS K nSSKJm  SUU4S jjmUR                  5        Vs/ s H  nT" U5      (       d  M  UR                  PM      nnU  Vs/ s H  nT" U5      (       d  M  UR                  PM      nn[        UR                  " / UQUQ76 5      $ s  snf s  snf )Nr   r+   r  c                  > [        U TR                  5      (       a  T" U R                  5      $ [        U TR                  5      (       a  T" U R                  5      $ [        U TR                  5      =(       a    [        U TR
                  5      $ rx   )ri   	TensorBoxdata
StorageBoxr4   	Pointwise)r  r  is_unrealized_nodes    rJ   r  *gather_origins.<locals>.is_unrealized_node  sd    a&&%aff--a''%aff--!RYY'GJq",,,GGrg   )r  r4   r   r  )	itertoolsr  r  r  r  r   chain)	rp   r  r  valkwarg_originsr|  arg_originsr  r  s	          @@rJ   gather_originsr#    s     H H -3MMOWOS?QRU?V[S[[OMW*.J$32DS2I;3;;$KJiooC{C]CDD XJs   BBB(Bc                X   ^^^^ SS jmSUU4S jjmSUU4S jjmSU4S jjmT" U 5      $ )z
Normal sympy str is very slow, this is a lot faster.  The result are
somewhat worse, as it doesn't do as much simplification.  So don't
use this for final codegen.
c                    [        U [        R                  5      =(       a1    [        U R                  5      S:H  =(       a    U R                  S   S:H  $ )N   r   r   )ri   rj   MulrE   rp   )exprs    rJ   is_neg_leadsympy_str.<locals>.is_neg_lead,  s:    tUYY'VC		Na,?VDIIaLTVDV	
rg   c                v  > [        U [        R                  5      (       a  [        U R                  5      S:X  aT  T" U R                  S   5      (       a:  T" U R                  S   5       ST" U R                  S   R                  S   5       3$ SR                  [        TU R                  5      5      $ T" U 5      $ )Nr&  r+   r   z - z + )ri   rj   rk   rE   rp   r  rn   )r(  r)  sympy_str_muls    rJ   sympy_str_add sympy_str.<locals>.sympy_str_add1  s    dEII&& 499~"{499Q<'@'@'		!56c-		RSHYHYZ[H\:]9^__zz#mTYY"?@@ &&rg   c                   > [        U [        R                  5      (       aJ  T" U 5      (       a  ST" U R                  S   5       3$ SR	                  [        TU R                  5      5      $ T" U 5      $ )N-r+   z * )ri   rj   r'  rp   r  rn   )r(  r)  sympy_str_atoms    rJ   r,   sympy_str.<locals>.sympy_str_mul<  sa    dEII&&4   >$))A,7899zz#ndii"@AA!$''rg   c                  > [        U [        R                  5      (       a  U R                  $ [        U [        R                  [        R
                  45      (       a  ST" U 5       S3$ [        U [        [        [        [        45      (       aC  U R                  R                   SSR                  [        [        U R                  5      5       S3$ [!        U 5      $ )N()r  )ri   rj   Symbolr   rk   r'  rV   rS   rT   rU   funcr   r  rn   	sympy_strrp   r   )r(  r-  s    rJ   r1  !sympy_str.<locals>.sympy_str_atomG  s    dELL))99uyy%))455}T*+1--(HMNNii(()499SDII5N+O*PPQRRt9rg   )r(  r   r   r  r(  r   r   r   r   )r(  r)  r-  r1  r,  s    @@@@rJ   r8  r8  %  s.    

	' 	'	( 	( rg   c                    SSK Jn  [        R                  (       a9  [	        UR
                  SS 5      =n(       a  UR                  S:w  a  [        U 5      $ [        R                  " 5       $ )Nr+   rP  current_node
index_expr)
rS  rQ  r[   compute_all_boundsrB   interpreterrp  rY   rZ   unknown)r   rQ  fx_nodes      rJ   get_bounds_index_exprrB  T  sN     	!!~tDDWDNNl*5!!""$$rg   c                    U S   S:H  $ )Nr   rr   )prefixs    rJ   prefix_is_reductionrF  b  s    !9rg   c                D    U [         R                  :w  d   e[        XSSS9$ )1
Used to generate an integer-nonnegative symbol.
Tintegernonnegative)rX   SIZErW   )rE  r  s     rJ   sympy_index_symbol_with_prefixrM  f  s'     TYY vDdCCrg   c                b    U =(       d    [         R                  =(       a    [         R                  $ rx   )r[   debug_index_assertsassert_indirect_indexing)checks    rJ   generate_assertrR  r  s    /V//TV5T5TTrg   c                D    U S   S:w  d   e[         R                  " U SSS9$ )rH  r   r   TrI  )rj   r6  r   s    rJ   sympy_index_symbolrU  v  s)     7c>> <<d==rg   c                          SS jn[         R                  " U 5      R                  UR                  5        VVs0 s H  u  p4X2" X45      _M     snn5      $ s  snnf )z
When the passed replacement symbol v is a string, it is converted to a symbol with name v that
have the same replaced expression integer and nonnegative properties.
c                    [        U [        R                  5      (       d   e[        U[        5      (       a*  [        R                  " UU R
                  U R                  S9$ U$ )NrI  )ri   rj   r  r   r6  r   is_nonnegative)replacedreplacements     rJ   	to_symbolsympy_subs.<locals>.to_symbol  sV     (EJJ////k3''<< ++$33  rg   )rY  r   rZ  zUnion[sympy.Expr, str]r   sympy.Symbol)rj   r  xreplacer  )r(  replacementsr[  krs   s        rJ   
sympy_subsra    sh    +A	 ==''(4(:(:(<=(<IaO	(<= =s   A
c                   [        U [        R                  5      =(       dd    [        U [        R                  5      =(       aC    [	        S [
        R                  " U R                  5       U R                  5       5       5       5      $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7frx   is_symbolicr   rG   s     rJ   r   is_symbolic.<locals>.<genexpr>  s     N(M1A(Mr   )	ri   rC   r&   r  r`  r  r  r  stride)r  s    rJ   re  re    sS    a& 1ell# 	ON	!((*(MNNrg   c                 &    [        S U  5       5      $ )Nc              3  8   #    U  H  n[        U5      v   M     g 7frx   rd  r^  s     rJ   r   "any_is_symbolic.<locals>.<genexpr>  s     ,t!{1~~tr   r`  )rp   s    rJ   any_is_symbolicrm    s    ,t,,,rg   c                   SSK Jn  [        / SQ5      n[        R                  " 5       (       a  UR                  S5        U R                  R                   H  n[        UR                  5      U;   a  Us  $ [        R                  R                  R                  (       ds  [        UR                  [        R                  R                  5      (       a@  [        R                   R"                  R$                  UR                  R&                  ;   a  Us  $ UR(                  R+                  S5      =nc  M  U" U5      (       d  M  Us  $    g )Nr   )free_unbacked_symbols)z,aten._fused_moving_avg_obs_fq_helper.defaultz7aten._fused_moving_avg_obs_fq_helper_functional.defaultzfbgemm.dense_to_jagged.defaultz%fbgemm.jagged_to_padded_dense.defaultrun_and_save_rng_staterun_with_rng_statezaten._local_scalar_densezaten._assert_scalar)zaten._unsafe_index_put.defaultz0aten._unsafe_masked_index_put_accumulate.defaultzaten.index_put.defaultzaten.index_put_.defaultzaten.scatter.srczaten.scatter.reducezaten.scatter.value_reducezaten.scatter_add_zaten.scatter_add.defaultzaten.scatter_reduce.twozaten.scatter_reduce_.twozaten.scatter_reduce.two_outr   )%torch.fx.experimental.symbolic_shapesro  r   rC   $are_deterministic_algorithms_enabledrE  rT  r  r   rp  	_inductorr[   graph_partitionri   rq  rr  r   ru  cudagraph_unsaferw  r   get)r  ro  forbidden_setr  r   s        rJ   %get_first_incompatible_cudagraph_nodery    s     L	
M  1133	
" t{{},K &&664;;

(=(=>>--1A1AA
 K99==''C49Ns9S9SK " rg   c                    [        [        [        U R                  R                  5      5      5      nUR
                  S:X  d   eU$ )z$Get the output node from an FX graphr  )nextiterreversedrT  r  rd  )r  	last_nodes     rJ   output_noder    s6    T(288>>234I<<8###rg   c                    U R                   R                  SS9n[        S U 5       5      n[        U 5      R                  S   n[        U[        5      (       a  UOU4n[        S U 5       5      nX%-  $ )Nr~  rc  c              3     #    U  HX  n[        UR                  R                  S 5      [        R                  5      (       d  M=  UR                  S    R
                  v   MZ     g7fr   N)ri   r   rw  rC   r  r   )r   r  s     rJ   r   "get_all_devices.<locals>.<genexpr>  sC      9%DdiimmE*ELL9 	 		%%s   <A" A"r   c              3    #    U  H  n[        U[        R                  R                  5      (       d  M.  [        UR                  R                  S 5      [        R                  5      (       d  Mh  UR                  S    R                  v   M     g7fr  )ri   rC   r  r*   r   rw  r  r   )r   r|  s     rJ   r   r    s[      7Cc588==) 	 sxx||E*ELL9 	s   -B6B- B)rT  
find_nodesr   r  rp   ri   tuple)r  placeholder_nodesinput_devicesout_argout_argsout_devicess         rJ   get_all_devicesr    s~    ++}+=.8 9%9 /M "o""1%G$We44w7*H,6 77 -K &&rg   c                    [        [        R                  R                  5       5       GHC  n U R	                  S5      (       d  M  [        R                  U    nUR
                  R                  5        H  nUR	                  S5      (       d  M  [        X5      n[        U[        R                  R                  R                  R                  5      (       d  Me  UR                   Hp  n[        U[        R                  R                  R                  R                  5      (       d  MB  UR                  R                   R"                  R%                  5         Mr     M     [        R                  U 	 GMF     S[        R                  ;   aR  [        R                  S   n['        UR(                  R*                  R,                  5      ?UR(                  R*                  ?[0        R2                  " 5         g )Nz&torch._inductor.runtime.compile_tasks.triton_ztriton.runtime.driver)rF  sysmodulesr  
startswith__dict__rB   ri   rC   rt  runtimetriton_heuristicsCachingAutotunercompile_resultsTritonCompileResultkernelrunmod__del__r   driveractiveutilsinstancegccollect)module_namem	attr_namer  r  r  s         rJ   unload_xpu_triton_pydsr    sQ   CKK,,./%%&NOOKK$*I##I.. .EOO33EEVV  #)"8"8%"!OO33EEYY  #MM--1199; #9 + KK$! 0& #++-kk12""(()2JJ#JJLrg   _registered_cachesc                    [        U S5      (       a  [        U R                  5      (       d  [        U  S35      e[        R                  U 5        U $ )z\
Use this decorator to register any caches that should be cache_clear'd
with fresh_cache().
cache_clearz# does not have a cache_clear method)r   callabler  AttributeErrorr  r}  r  s    rJ   clear_on_fresh_cacher    sE    
 3&&hs.G.Gu$GHIIc"Jrg   c                 >    [          H  n U R                  5         M     g)z
Clear all registered caches.
N)r  r  r  s    rJ   clear_cachesr  *  s     " "rg   c              #    ^#    [        5         [        R                  " US9m [        R                  R                  [        R                  ST05         [        R                  ST5        [        R                  R                  TS5      n[        R                  R                  [        R                  SU05         Sv   [        U [
        5      (       a  [        U 5      S:X  d   S5       e[        R                  R                  U5      (       a{  [        R                  " U5      nU R!                  U Vs0 s HH  nS	U;  d  M  U[        R                  R#                  [        R                  R                  X55      5      _MJ     sn5        SSS5        SSS5        U(       aU  [%        5       (       a-  [&        R(                  R+                  5       (       a
  [-        5         [.        R0                  " TU4S
 jS9  [        5         gs  snf ! , (       d  f       N= f! , (       d  f       N= f! [2         a    [        R5                  ST5        e f = f! [        5         f = f7f)z
Contextmanager that provides a clean tmp cachedir for pt2 caches.

Optionally, pass a dict as 'cache_entries' to get a list of filenames and sizes
generated with this cache instance.
)dirTORCHINDUCTOR_CACHE_DIRzUsing inductor cache dir %stritonTRITON_CACHE_DIRNr   z!expected empty cache_entries dictz.lockc                .   > [         R                  STUS9$ )Nz*Failed to remove temporary cache dir at %s)exc_info)r   warning)r7  pathr  inductor_cache_dirs      rJ   rh  fresh_cache.<locals>.<lambda>]  s    S[[@&% 6A 6rg   )onerrorz(on error, temporary cache dir kept at %s)r  tempfilemkdtempr   patchdictosenvironr   r   r  r  ri   rE   existslistdirrE  getsize
is_windowsrC   r>   rD   r  shutilrmtree	Exceptionr  )cache_entriesr  deletetriton_cache_dirfilesfr  s         @rJ   fresh_cacher  2  s     N!))c2&ZZ__JJ24FG
 II35GH!ww||,>I.@BR-STmT22}-2W4WW2ww~~&677 "

+; <%,, */).A#*!#3 !V277??277<<@P3T#U U). U
$ ||		 6 6 8 8&(MM"
 	3 UT
 
B  >@RS 	st   I0H' A'H:A9H3
H AH H	HA$H' 5I H
H	H
H$ H' '"I		I IIc           
     z    U R                   n[        [        U 5      5      n[        [	        [        X!SS95      5      $ )NT)rG  reverse)__getitem__r   rE   rF  r}  r  )seqgettera_rs      rJ   argsortr  p  s/    __F
C/C>?@@rg   c           	     D  ^  SU 4S jjn[        U5       VVs/ s H>  u  p4U[        U[        R                  5      (       a  UR                  R
                  OU4PM@     nnn[        U[        R                  " U5      S9nU VVs/ s H  u  p6UPM	     nnnU$ s  snnf s  snnf )Nc                ~   > U u  p#Uu  pESU4S jjnU" X5:  5      (       a  gU" X5:  5      (       a  gX$:  a  gX$:  a  gg)Nc                R   > [        U [        5      (       a  U $ TR                  U SS9$ )NT)size_oblivious)ri   r  evaluate_expr)r(  rV  s    rJ   evaluate*argsort_sym.<locals>.cmp.<locals>.evaluate~  s+    $%%**4*EErg   r   r+   r   )r(  z%Union[bool, torch.SymInt, sympy.Expr]r   r  r   )r  r  a_idxa_valb_idxb_valr  rV  s          rJ   r  argsort_sym.<locals>.cmpz  sN    	F
 EM""EM""
 ==rg   r  )r  tuple[int, sympy.Expr]r  r  r   ry   )	r   ri   rC   r&   r  r(  r  r  
cmp_to_key)rV  r  r  r  r   exprsr   r  s   `       rJ   argsort_symr  w  s    4  n$FC 
Z5<<88affkka@$ 
  5i22378E %&fccF&M
 's   ABBc                r    U [         R                  :X  a  g[         R                  " SU S9R                  5       $ )Nrb   r   r   )rC   rB  r   element_sizer  s    rJ   get_dtype_sizer    s-     ;;r'4466rg   c                       \ rS rSr% S\S'   Srg)LineContexti  r   contextr   Nr   r   r   r   r   r   r   rg   rJ   r  r    s    Lrg   r  c                  *    \ rS rSr% S\S'   S\S'   Srg)ValueWithLineMapi  r   r|   zlist[tuple[int, LineContext]]line_mapr   Nr  r   rg   rJ   r  r    s    J++rg   r  c                     \ rS rSrSrSSS jjr\R                  SS j5       rSS jr	SS jr
SS jrSS jrSS	 jrSS
 jrSS jrSS jr    SS jrS S!S jjrS S"S jjrS S"S jjr S#     S$S jjrS%S jrSS jrS&S jrSrg)'IndentedBufferi     c                    / U l         Xl        g rx   )_lines_indent)r  initial_indents     rJ   __init__IndentedBuffer.__init__  s    GI%rg   c              #  \   #    U R                   n Xl         S v   X l         g ! X l         f = f7frx   )tabwidth)r  r  prevs      rJ   set_tabwidthIndentedBuffer.set_tabwidth  s%     }}	!$M MDMs   ,
! ,),c                   [        5       nSn/ nU R                   H  n[        U[        5      (       a  U" 5       nUc  M$  O5[        U[        5      (       a  UR                  X$R                  45        MX  Un[        U[        5      (       d   eUR                  U5        UR                  S5        USUR                  S5      -   -  nM     [        UR                  5       U5      $ )Nr+   r  )r
   r  ri   DeferredLineBaser  r}  r  r   writecountr  getvalue)r  bufr   linemaplilines         rJ   getvaluewithlinemap"IndentedBuffer.getvaluewithlinemap  s    j13++B".//t<  B,,::/dC((((IIdOIIdOTZZ%%%A   88rg   c                6    U R                  5       R                  $ rx   )r  r|   r  s    rJ   r  IndentedBuffer.getvalue  s    '')///rg   c                   [        5       nU R                   H  n[        U[        5      (       a  U" 5       nUc  M$  O[        U[        5      (       a  M<  Un[        U[
        5      (       d   eUR                  S5      (       a  UR                  US S 5        M  UR                  U5        UR                  S5        M     UR                  5       $ )N\r   r  )	r
   r  ri   r  r  r   endswithr   r  )r  r  r  r  s       rJ   getrawvalueIndentedBuffer.getrawvalue  s    j++B".//t<  B,,dC((((}}T""		$s)$		$		$   ||~rg   c                8    U R                   R                  5         g rx   )r  clearr
  s    rJ   r  IndentedBuffer.clear  s    rg   c                ,    [        U R                  5      $ rx   )r  r  r
  s    rJ   __bool__IndentedBuffer.__bool__  s    DKK  rg   c                :    SU R                   U R                  -  -  $ )Nr  )r  r  r
  s    rJ   rE  IndentedBuffer.prefix  s    dllT]]233rg   c                &    U R                  S5        g )Nr  	writeliner
  s    rJ   newlineIndentedBuffer.newline  s    trg   c                   [        U[        5      (       a  U R                  R                  U5        g [        U[        5      (       a9  U R                  R                  UR                  U R                  5       5      5        g UR                  5       (       a.  U R                  R                  U R                  5        U 35        g U R                  R                  S5        g Nr  )ri   r  r  r}  r  with_prefixrE  stripr  r  s     rJ   r  IndentedBuffer.writeline  s    dK((KKt$.//KKt//>?ZZ\\KK$++-78KKr"rg   c                8    U H  nU R                  U5        M     g rx   r  )r  linesr  s      rJ   
writelinesIndentedBuffer.writelines  s     DNN4  rg   c                L   ^ ^ [         R                  SUU 4S jj5       nU" 5       $ )Nc               3     >#    T=R                   T -  sl          S v   T=R                   T -  sl         g ! T=R                   T -  sl         f = f7frx   r  )offsetr  s   rJ   r  "IndentedBuffer.indent.<locals>.ctx  s8     LLF"L'&&s   A4 AAAr   Iterator[None])
contextlibcontextmanager)r  r+  r  s   `` rJ   indentIndentedBuffer.indent  s$    		"	"	' 
#	' urg   c                .    U =R                   U-  sl         g rx   r*  r  r+  s     rJ   	do_indentIndentedBuffer.do_indent      rg   c                .    U =R                   U-  sl         g rx   r*  r4  s     rJ   do_unindentIndentedBuffer.do_unindent  r7  rg   c           	        [        U[        5      (       a  [        S5      nUR                   HR  n[        U[        5      (       a  M  U(       d  M#  [        U[        U5      [        UR                  5       5      -
  5      nMT     [        R                  " U5      (       a  SnUR                   HV  n[        U[        5      (       a  U R                  R                  U5        M5  [        R                  X[        U5      S  5        MX     g [        R                  " U5      nU(       a  UR                  5       nU(       d  g UR                  5       nUR!                  S5       H  nU R                  U5        M     g )Ninfr   r  )ri   r  floatr  r  minrE   r  mathisinfr}  r  ry   textwrapdedentrstriprD  )r  
other_coder!  rB  r  r   s         rJ   spliceIndentedBuffer.splice  s    j.115\F"))!$44 TS5G)GHF * zz&!!"))dK00KK&&t,",,TF3FG	 * "4J'..0
#**,J%%d+q! ,rg   c                    [        U R                  S9nU R                   Vs/ s H
  o1" U5      PM     snUl        U$ s  snf N)r  )r  r  r  )r  r7  r   r  s       rJ   rn   IndentedBuffer.map/  s8    DLL9-1[[9[Td4j[9

 :s   =c                @    [        U 5       SU R                  5        S3$ )Nr4  r5  )r   r  r
  s    rJ   __repr__IndentedBuffer.__repr__4  s     t*Qt}}/q11rg   c                    U R                   UR                   :X  d   e[        U R                   S9nUR                  U R                  5        UR                  UR                  5        U$ rH  )r  r  r&  r  )r  otherr   s      rJ   __add__IndentedBuffer.__add__7  sK    ||u}},,,DLL9t{{#u||$
rg   )r  r  r  Nr   )r  ry   r   r  )r  ry   r   r.  )r   r  r   r   r   r  r   r  )r  z)Union[LineContext, DeferredLineBase, str]r   r  )r%  z3Sequence[Union[LineContext, DeferredLineBase, str]]r   r  rv   )r+  ry   r   'contextlib.AbstractContextManager[None])r+  ry   r   r  F)rD  zUnion[IndentedBuffer, str]r!  r  r   r  )r7  zCallable[[Any], Any]r   r  )rN  r   r   r  )r   r   r   r   r  r  r/  r0  r  r  r  r  r  r  rE  r  r  r&  r1  r5  r9  rE  rn   rK  rO  r   r   rg   rJ   r  r    s    H& ! !9(0(!4#!H!	!	 EJ"4"=A"	"2
2rg   r  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )FakeIndentedBufferi@  c                "   > [         TU ]  5         g rx   )superr  )r  	__class__s    rJ   r  FakeIndentedBuffer.__init__A  s    rg   c                V    US:X  a  [         R                  X5      $ [        SU S35      e)Nr[  zTried to call self.z on FakeIndentedBuffer. This bufferis currently used on TritonTemplateKernel to prevent actualwrites to the body without explicitly specifying the body with`TritonTemplateKernel.set_subgraph_body(name)`)object__getattribute__r   )r  r   s     rJ   r_  #FakeIndentedBuffer.__getattribute__D  s9    ;**466!$ (= =
 	
rg   r   rS  )r   r   r   r   )r   r   r   r   r  r_  r   __classcell__r[  s   @rJ   rX  rX  @  s    
 
rg   rX  c               #     #    [         R                  [         R                  p S v   Xs[         l        [         l        g ! Xs[         l        [         l        f = f7frx   )r  stdoutstderr)initial_stdoutinitial_stderrs     rJ   restore_stdout_stderrrh  O  s9     %(ZZN@!/
CJ
CJs    A> AAAc                  h    \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSrg)r  iX  z.A line that can be 'unwritten' at a later timec                >    UR                  5       (       d  SnXl        g r  )r!  r  r"  s     rJ   r  DeferredLineBase.__init__[  s    zz||D	rg   c                    [         e)zJReturns either self.line or None to indicate the line has been 'unwritten'r  r
  s    rJ   r  DeferredLineBase.__call__`      !!rg   c                    [         e)z3Returns a new deferred line with the same conditionrm  r"  s     rJ   	_new_lineDeferredLineBase._new_lined  ro  rg   c                @    U R                  U U R                   35      $ rx   rq  r  )r  rE  s     rJ   r   DeferredLineBase.with_prefixh  s    ~~455rg   c                T    U R                  U R                  R                  5       5      $ rx   )rq  r  r  r
  s    rJ   r  DeferredLineBase.lstripk  s    ~~dii..011rg   c                >    U R                  U R                  U   5      $ rx   rt  )r  r   s     rJ   r  DeferredLineBase.__getitem__n  s    ~~dii.//rg   c                ,    [        U R                  5      $ rx   )r  r  r
  s    rJ   r  DeferredLineBase.__bool__q  s    DIIrg   c                ,    [        U R                  5      $ rx   )rE   r  r
  s    rJ   __len__DeferredLineBase.__len__t  s    499~rg   )r  N)r  r   )r   zUnion[str, None])r  r   r   r   )rE  r   r   r   )r   r   )r   zUnion[int, slice]r   r   rT  r   ry   )r   r   r   r   r   r  r  rq  r   r  r  r  r}  r   r   rg   rJ   r  r  X  s-    8
""620rg   r  c                  D   ^  \ rS rSrSrSU 4S jjrSS jrS	S jrSrU =r	$ )
DelayReplaceLineix  z6At end of codegen call `line.replace(key, value_fn())`c                <   > [         TU ]  U5        Xl        X l        g rx   )rZ  r  rG  value_fn)r  rG  r  r  r[  s       rJ   r  DelayReplaceLine.__init__{  s     rg   c                j    U R                   R                  U R                  U R                  5       5      $ rx   )r  replacerG  r  r
  s    rJ   r  DelayReplaceLine.__call__  s#    yy  4==?;;rg   c                D    [        U R                  U R                  U5      $ rx   )r  rG  r  r"  s     rJ   rq  DelayReplaceLine._new_line  s    $-->>rg   )rG  r  )rG  r   r  zCallable[[], str]r  r   rR  )r  r   r   r  )
r   r   r   r   r   r  r  rq  r   ra  rb  s   @rJ   r  r  x  s    @!
<? ?rg   r  c                   [        U [        R                  5      (       a  U nO[        R                  " [        5       U 5      n[        R
                  " U5      n[        R                  R                  (       aF  UR                  c   eUR                  S:  d  UR                  S:X  a  [        R                  S5        ggUR                  S:X  a  SOSnUR                  nXC:  a  [        R                  S	X4S
.S9  gg)N	   
   z6GPU arch does not support max_autotune_gemm mode usageFTr>   r_   D   z,Not enough SMs to use max_autotune_gemm mode)min_sms	avail_sms)extra)ri   rC   r   rK   r   createversionhipmajorr   r  r   multi_processor_count)index_or_devicer   propr  r  s        rJ   
is_big_gpur    s    /5<<00 lno>""6*D }}zz%%%::>TZZ2-KKPQKK5(bbG**I:%> 	 	
 rg   c                 T    [         R                  R                  S5      R                  $ )Nr<   )rC   r<   get_device_propertiesr  r   rg   rJ   get_max_num_smsr    s    ::++F3IIIrg   c                 f    [         R                  R                  5       n [        5       U b  U -
  $ S-
  $ )zFHandle experimental carveout if set otherwise return hardware SM countr   )rC   r   _get_sm_carveout_experimentalr  )carveouts    rJ   get_num_smsr    s1     xx557HH,@HHaHHrg   c                    SSK JnJn  Uc
  [        5       nUR	                  S5      nX -  [
        -  nU" UUUUR                  " 5       S9$ )zKBuilds and returns a WorkspaceArg for the device side TMA workspace buffer.r+   )r,   WorkspaceZeroModeF)r  	zero_moder   
outer_name)codegen.commonr,   r  r  	from_boolTMA_DESCRIPTOR_SIZEunique_name)num_tma_descriptorsr   num_programsr,   r  r  r  s          rJ   get_tma_workspace_argr    sU     @"}!++E2I-0CCD++-	 rg   c                   U R                   U;  a!  [        R                  SU R                   U5        [        U R                  R
                  5      =(       a+    U R                   U;   =(       a    [        U R                  5      $ )NzDNot using template since dtype %s is not in allowed layout dtypes %s)r   r   r   is_gpur   r   r  )layoutallowed_layout_dtypess     rJ   _use_template_for_gpur    sf     ||00		RLL!	
 	v}}!!" 	&LL11	&v}}%rg   c                    U R                  5       [        R                  R                  5       R                  S5       Vs/ s H  oR	                  5       PM     sn;   $ s  snf N,)upperr[   max_autotune_gemm_backendsrD  r!  backendrG   s     rJ   _use_autotune_backendr    P    ==?!<<BBDJJ3OOa	O      Ac                    U R                  5       [        R                  R                  5       R                  S5       Vs/ s H  oR	                  5       PM     sn;   $ s  snf r  )r  r[   max_autotune_conv_backendsrD  r!  r  s     rJ   _use_conv_autotune_backendr    r  r  F)enable_int32enable_float8c                  SSK JnJn  [        R                  [        R
                  [        R                  /nU(       a>  [        R                  [        R
                  [        R                  [        R                  /nU(       a/  UR                  [        R                  [        R                  /5        [        U R                  R                  5      =(       a    [        X5      =(       d/    U R                  R                  S:H  =(       a    U R                  U;   =(       a]    [         R"                  =(       d    [         R$                  =(       a/    ['        S5      =(       a    U" U R                  UR(                  5      $ )Nr+   )BackendFeaturehas_backend_featurer   TRITON)r  r  r  rC   r   r.  r0  r8  extendr(  r)  r  r   r   r  r   r[   max_autotunemax_autotune_gemmr  TRITON_TEMPLATES)r  r  r  r  r  layout_dtypess         rJ   use_triton_templater    s     D]]ENNEMMBMu{{Se1153D3DEF v}}))* A)&@O ""e+M0M		P   <F$<$<		P "(+		P  ~/N/NOrg   c                    ^^ SSK JnJn  SSKJm  SU4S jjmU" 5       (       a  [
        R                  (       a  g[
        R                  R                  =(       a#    U" 5       =(       a    [        U4S jU  5       5      $ )	Nr   )has_triton_stable_tma_apihas_triton_tma_devicer+   rP  c                |  > [        U R                  5       5      S:w  a  gU R                  5       nU[        R                  [        R
                  [        R                  4;  a  gU R                  5       nUR                  5       nUR                  5       (       d  U(       d  gUR                  S   nU(       a  UR                  S   nU[        R                  :X  a,  TR                  R                  R                  US5      (       a  gXAR                  -  nTR                  R                  R                  U[         5      $ )Nr&  Fr+   r       )rE   get_size	get_dtyperC   r   r.  r(  
get_layoutis_transposedis_contiguousr  rT  rU  statically_known_ltitemsizestatically_known_multiple_ofTMA_ALIGNMENT)rG   r   r  
transposed	inner_diminner_bytesrQ  s         rJ   _is_tma_compatible3use_triton_tma_template.<locals>._is_tma_compatible  s    qzz|!8K8KLL))+
$$&&*KKN	AIE'''AGG,<,<,P,Pr-
 -
 ..0ww<<[-XXrg   Fc              3  4   >#    U  H  nT" U5      v   M     g 7frx   r   )r   r  r  s     rJ   r   *use_triton_tma_template.<locals>.<genexpr>  s     8x!"1%%x   rG   r4   r   r  )
torch.utils._tritonr  r  rS  rQ  r[   cpp_wrapperr  enable_persistent_tma_matmulrm   )matricesr  r  rQ  r  s      @@rJ   use_triton_tma_templater    sW    TY2 !""v'9'9 	22 	9!#	98x88rg   c                @   SSK Jn  UR                  R                  R	                  X-  U-  SS9nUS::  d  U[
        R                  R                  :  a  gSSKJ	n  [        R                  R                  (       a  g[        R                  [        R                  [        R                  /n[!        X5      =(       a9    [
        R"                  =(       d    [
        R$                  =(       a    ['        S5      nU(       a"  U" 5       (       d  [(        R+                  S	5        gU$ )
Nr+   rP  r   fallbackr   F)try_import_cutlassCUTLASSzFailed to import CUTLASS lib. Please check whether _inductor.config.cuda.cutlass_dir is set correctly. Skipping CUTLASS backend for now.)rS  rQ  rT  rU  	size_hintr[   r<   cutlass_backend_min_gemm_sizecodegen.cuda.cutlass_utilsr  rC   r  r  r   r.  r8  r  r  r  r  r   r  )	r  r  r  r`  rQ  	gemm_sizer  r  r   s	            rJ   use_cutlass_templater  !  s      **1519r*BIA~V[[%N%NN> }} ]]ENNEKK@Mf4 	-  <F$<$<	-!),  !##KK4
 Jrg   c                    [         R                  R                  R                  5       nUS:X  a  gU R                  5       UR	                  S5       Vs/ s H  o"R                  5       PM     sn;   $ s  snf )z8Check if CUTLASS should be used for the given operation.ALLTr  )r[   r<   cutlass_enabled_opsr  rD  r!  )op_nameenabled_opsrG   s      rJ   _use_cutlass_for_opr  A  sY    ++11779Ke==?+2C2CC2HI2HQwwy2HIIIIs   A0r  r   )r_   r  ra   r`      r   _IntLikec           
        SSK Jn  UR                  R                  R	                  [
        R                  " [
        R                  " U[        U -  5      [
        R                  " U[        U-  5      5      5      =(       aY    UR                  R                  (       + =(       a7    UR                  R                  (       + =(       a    [        R                  (       + $ )Nr   rP  )torch._inductor.virtualizedrQ  rT  rU  statically_known_truerj   AndGedecompose_k_thresholdaot_moder  r[   disable_decompose_k)r  r  r`  rQ  s       rJ   use_decompose_k_choicer  T  s    - 	
..II1A561A56	
 	+    	+ ###	+ ***
rg   c                $   [        U[        R                  5      (       a  UR                  (       d  [        $ [        U [        R                  5      (       a  U R                  (       a0  [        U[        R                  5      (       a  UR                  (       d  SnO[        X -  X!-  5      nSn[        R                  " U5      nU Vs/ s H  nXc::  d  M
  Xd:  d  M  UPM     nn/ / / pnU H`  n
X*-  nUS:  a  M  XS-
  -  S:X  a  US:  a  UR                  U
5        M3  US-  S:X  a  UR                  U
5        MO  U	R                  U
5        Mb     [        R                  S:X  a  Xx-   U	-   $ [        U5      [        :  a  U$ Xx-   U	-   nUS [         $ s  snf )Nr  r&  r`   r+   r   r  
EXHAUSTIVE)ri   rj   r  	is_numberdefault_k_splitsr>  divisorsr}  r[   max_autotune_gemm_search_spacerE   k_splits_limit)r  r  r`  max_k_splitmin_k_splitr  divisorpow_of_2_divisorsmul_of_32_divisorsrest_of_splitsdkPartbest_splitss                rJ   get_k_splitsr  d  sw    !UZZ  1ejj!!!++1ejj!!!++!&!&)K~~a H  G! 	&-&< 	   =?B> 3; AI!#$$Q'RZ1_%%a( !!!$ " ,,< 5FF /  '<~M?N++Gs   	FFFc                T    [         R                  R                  U 5      R                  $ rx   )rC   r<   r  gcnArchNamer   s    rJ   _rocm_native_device_arch_namer    s    ::++F3???rg   c                      SS K n SSKJnJn  SSKJn  [        R                  R                  U R                  5      nXAX#4$ ! [         a    SS jnSS jn " S S5      nS n N&f = f)	Nr   )gen_ops_librarygen_ops_preselected)CKGemmOperationc                     / $ rx   r   r   rg   rJ   r  *try_import_ck_lib.<locals>.gen_ops_library      Irg   c                     / $ rx   r   r   rg   rJ   r  .try_import_ck_lib.<locals>.gen_ops_preselected  r  rg   c                      \ rS rSrSrg)*try_import_ck_lib.<locals>.CKGemmOperationi  r   N)r   r   r   r   r   r   rg   rJ   r  r    s    rg   r  )r   ry  )ck4inductor(ck4inductor.universal_gemm.gen_instancesr  r  ck4inductor.universal_gemm.opr  r  r  dirname__file__r   )r   r  r  r  package_dirnames        rJ   try_import_ck_libr&    sh    	
	
 ''//+*>*>? -@QQ  			 	 s   ;A  A$#A$c                P   [         R                  (       d  [         R                  (       d  g[        R                  R
                  (       d  gU R                  R                  S:X  d  g[        U R                  5      n[         R                  R                   Vs0 s H  o"R                  S5      S   U_M     sn=(       d    UR                  S5      S   U0nUR                  5       [         R                  R                  -   Vs/ s H  nX2   PM	     nnU(       d  gU R                  [        R                  [        R                   [        R"                  4;  a  g[%        5       u  n    nU(       d  [&        R)                  S5        g[         R*                  " 5       (       a  U[         R                  l        [         R                  R,                  (       d  [&        R)                  S5        gU[         R                  R,                  :w  a  [&        R)                  S5        ggs  snf s  snf )	NFr<   :r   z,Please pip install Composable Kernel packagez,Please set TORCHINDUCTOR_CK_DIR env variablezInvalid path to CK libraryT)r[   r  r  rC   r  r  r   r   r  rocmarchrD  r  ck_supported_archr   r   r.  r0  r&  r   r  	is_fbcodeck_dir)r  native_archr`  requested_archsrequested_supported_archsck_package_dirnamer   s          rJ   use_ck_templater2    s   6#;#;====' 0>K39;;3C3CD3Cawws|A)3CD #q!;IO
 !%%'&++*G*GG!GA 	G  ! %||EMM5>>5==II"3"51aBC/;;BCV[[///01= E!s   HH#c                    SSK Jn  [        S5      =(       a>    [        U 5      =(       a,    UR                  R
                  R                  X-  U-  SS9S:  $ )Nr+   rP  CKr   r  r   rS  rQ  r  r2  rT  rU  r  r  r  r  r`  rQ  s        rJ   use_ck_gemm_templater7    sP     	d# 	CF#	CGG&&quqy2&>Brg   c                    SSK Jn  [        S5      =(       a>    [        U 5      =(       a,    UR                  R
                  R                  X-  U-  SS9S:  $ )Nr+   rP  CKTILEr   r  r   r5  r6  s        rJ   use_ck_tile_gemm_templater:    sP     	h' 	CF#	CGG&&quqy2&>Brg   c                <    [        S5      =(       a    [        U 5      $ )Nr4  )r  r2  r  s    rJ   use_ck_conv_templater=     s    %d+G0GGrg   c                    [         R                  =(       d    [         R                  =(       a    U R                  R                  S:H  $ r  )r[   r  r  r   r   r<  s    rJ   _use_template_for_cpur?    s2    7v77&
--


%&rg   c                    SSK Jn  [        UR                  U5      (       d   e[	        XUSS9=(       a    UR                  R                  5       $ )Nr+   )r5   F)require_constant_mat2)r  r5   ri   r  use_cpp_gemm_templater  )r  mat1mat2r5   s       rJ   use_cpp_bmm_templaterE  
  sF     dkk6**** 	fDN 	(KK%%'rg   c                   SSK Jn  SSKJn  SSKJn	  SSKJn
  [        U 5      (       a  [        S5      (       d  g[        R                  R                  (       d  gUR                  5       [        R                  [        R                   4;   n[        R"                  [        R$                  [        R&                  [        R                  /nU
" UUU(       a  U R(                  OS UUS9u  ppp[+        X45      (       a  g[-        X'R.                  5      (       a  UR1                  5       nU	" UR                  5       5      u  nnU" S	UUUUR                  5       UR                  5       U[3        5       U(       + US
9
nSS jnU R(                  U;   =(       aT    US L=(       aI    U" U5      =(       a:    [-        X'R4                  5      =(       a    UR7                  5       =(       d    U(       + $ )Nr+   r  )create_micro_gemm)*get_gemm_template_output_and_compute_dtype)mm_argsCPPF)	out_dtypemat2_transposeduse_4x2_dim
micro_gemm)input_dtypeinput2_dtypeoutput_dtypenum_threadsuse_refq_group_sizec                N    U R                  5         U R                  5       S   S:H  $ )Nr   r+   )freeze_layout
get_striderG   s    rJ   is_last_dim_stride12use_cpp_gemm_template.<locals>.is_last_dim_stride1J  s"    	||~b!Q&&rg   r  )r  r  codegen.cpp_micro_gemmrG  codegen.cpp_utilsrH  kernel.mm_commonrI  r?  r  r[   cppweight_prepackr  rC   r?  r4  r0  r.  halfr   has_free_symbolsri   BaseViewunwrap_viewparallel_num_threadsr  is_module_buffer)r  rC  rD  rL  rA  is_woq_int4rT  r  rG  rH  rI  	int8_gemmr  r  r  r`  rQ  r   rN  rY  s                       rJ   rB  rB    s    9M) ((0Ee0L0L::$$ U[[%**$==I]]ENNEJJLM")"+&,,'#A!T $$$!@AQROL!"			NN$^^%!(*!J'
 	% 	Cd"	C%	C t]]+	C ""$A,A(Arg   c                 ~    [         R                  =(       d    [         R                  (       + =(       d    [        S5      $ )NATEN)r[   r  r  r  r   rg   rJ   use_aten_gemm_kernelsrj  W  s-    7v77 '	v	&'rg   c                  b    \ rS rSr% \R
                  " S5      rS\S'   S
S jrS
S jr	SS jr
Srg	)DebugDirManageri]  r   r   prev_debug_namec                @    [        [        R                  5      U l        g rx   )r{  rl  counterr   r
  s    rJ   r  DebugDirManager.__init__a  s    ../rg   c                    [         R                  R                  R                  U l        U R                   SU R
                   3U l        U R                  [         R                  R                  l        g )N_tmp_)rC   _dynamor[   debug_dir_rootrm  r   new_namer
  s    rJ   	__enter__DebugDirManager.__enter__d  sM    $}}33BB//0dggY?.2mm+rg   c                    [         R                  " U R                  5        U R                  [        R
                  R                  l        g rx   )r  r  ru  rm  rC   rs  r[   rt  )r  rp   s     rJ   __exit__DebugDirManager.__exit__i  s*    dmm$.2.B.B+rg   )r   ru  rm  NrS  )rp   r   r   r  )r   r   r   r   r  r  ro  r   r  rv  ry  r   r   rg   rJ   rl  rl  ]  s&    ooa G0<
Crg   rl  c                   ^ SSK Jn  / mSU4S jjn[        R                  R	                  USU5         [
        R                  R                  5         U " U0 UD6nS S S 5        UT4$ ! , (       d  f       WT4$ = f)Nr+   r/   c                (   > TR                  U 5        g rx   r}  codesource_codess    rJ   save_output_code*run_and_get_code.<locals>.save_output_codew      D!rg   r  r  r   r   r  rT  r0   r   r  r^  rC   rs  reset)r   rp   r  r0   r  r  r  s         @rJ   run_and_get_coder  n  su    
 % L" 
		=*<>N	OT$V$ 
P < 
P	O <s   'A&&
A7c                    [        U /UQ70 UD6u  p4/ nU H8  nUR                  [        R                  " SU[        R                  5      5        M:     X54$ )Nz	'''.*?''')r  r  refindallDOTALL)r   rp   r  r  r  kernelsr  s          rJ   run_and_get_kernelsr    sO     ,B@@@FGrzz,bii@A ?rg   c                *   ^  SU 4S jjn[        U5      $ )Nc                 R   > T" 5       n U R                  5       R                  5         U $ rx   )r   backward)r  r   s    rJ   run_with_backward1run_fw_bw_and_get_code.<locals>.run_with_backward  s!    

rg   )r   r   )r  )r   r  s   ` rJ   run_fw_bw_and_get_coder    s    
 -..rg   c                t  ^^ SSK Jn  / mSU4S jjmS	U4S jjn[        R                  R	                  USU5         [        R                  R	                  UST5         [
        R                  R                  5         U " U0 UD6nSSS5        SSS5        T$ ! , (       d  f       N= f! , (       d  f       T$ = f)
zLGet the inductor-generated code, but skip any actual compilation or running.r+   r/   c                (   > TR                  U 5        g rx   r}  r~  s    rJ   r  "get_code.<locals>.save_output_code  r  rg   c                   >  " S S5      nU R                   (       a  U R                  5       OU R                  5       u  p#T" UR                  5        U(       a  T" UR                  5        U" 5       $ )Nc                  ,    \ rS rSrSrSS jrSS jrSrg)	@get_code.<locals>.patched_compile_to_module.<locals>.DummyModulei  z4This is empty to replace the generated triton modulec                    g rx   r   r
  s    rJ   r  Iget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.__init__  s    rg   c                    g rx   r   r  s      rJ   callEget_code.<locals>.patched_compile_to_module.<locals>.DummyModule.call  s    rg   r   NrS  rp   r   r  r   r   r  )r   r   r   r   r   r  r  r   r   rg   rJ   DummyModuler    s    Frg   r  )r  codegen_with_cpp_wrappercodegenr|   )r  r  wrapper_codekernel_coder  s       rJ   patched_compile_to_module+get_code.<locals>.patched_compile_to_module  s[    	 	 04/?/?D))+T\\^ 	"
 	++,[../}rg   compile_to_moduler  Nr  )r  r0   r   r   r  )r   rp   r  r0   r  r   r  r  s         @@rJ   get_coder    s    $ L". 	

.0I	
 	

-);=MN	 	O	
  	ON	
 	
 s#   "B('BB(
B%	!B((
B7c                    [        U /UQ70 UD6nS[        U5      s=::  a  S::  d  O   S[        U5       35       eUS   $ Nr+   r&  z%expected one or two code outputs got r   )r  rE   )r   rp   r  r  s       rJ   get_triton_coder    sQ    B000LL!&Q& 
/L0A/BC& ?rg   c                    [        U /UQ70 UD6u  p4S[        U5      s=::  a  S::  d  O   S[        U5       35       eUS   $ r  )r  rE   )r   rp   r  r   r  s        rJ   run_and_get_triton_coder    sU     'r;D;F;OAL!&Q& 
/L0A/BC& ?rg   c                   ^^^ SSK Jm  SSKJn  UR                  m/ mSUUU4S jjn[
        R                  R                  USU5         U " U0 UD6nS S S 5        UT4$ ! , (       d  f       WT4$ = f)Nr   r/   r8   c                 h   > T" U 0 UD6  U S   n[        UT5      (       d   eTR                  U5        g )Nr&  )ri   r}  )rp   r  rT  r0   graph_lowerings	real_inits      rJ   	fake_init-run_and_get_graph_lowering.<locals>.fake_init  s:    4"6"Q%////u%rg   r  r  )torch._inductor.graphr0   torch._inductor.output_coder9   r  r   r  r^  )	r   rp   r  r9   r  r  r0   r  r  s	         @@@rJ   run_and_get_graph_loweringr    sv     4;((IO& & 
		?J		BT$V$ 
C ?"" 
C	B ?""s   		A
A/c              #     #    SSK Jn  UR                  U    n [        R                  " X5      UR                  U '   Sv   X2R                  U '   g! X2R                  U '   f = f7f)zs
Override the lowering of aten_op with override_fn.
The first argument of override_fn is the original lowering fn.
r   )loweringN)torch._inductorr  	loweringsr  partial)aten_opoverride_fnr  orig_fns       rJ   override_loweringr    sY      )  )G.&/&7&7&M7#&-7#g7#s   A"'A  A"AA"c                   ^ ^^ SSK Jn  UR                  mSUUU 4S jjn[        R                  R
                  R                  USU5      $ )zf
Add hook functions to be called at the beginning and end of Scheduler.__init__.
Used for unit tests.
r   )	Schedulerc                F   > T" X5        T" X5      nT(       a  T" X5        U$ rx   r   )r  r  outr  post_fnpre_fns      rJ   r  (add_scheduler_init_hook.<locals>.wrapper  s%    y i'I%
rg   r  )r  r   r  r   r   r   )torch._inductor.schedulerr  r  unittestr   r  r^  )r  r  r  r  r  s   ``  @rJ   add_scheduler_init_hookr    s>     4  G  ==%%iWEErg   c                    [         R                  (       a  [        R                  U 5        g[        R	                  U 5        g)z
Warnings that will be actionable for PyTorch developers, but not
end users.  Allows us to easily disable them in stable releases but
keep them on for nightly builds.
N)r[   developer_warningsr   r  info)msgs    rJ   developer_warningr    s$       Crg   c                     [         R                  R                  S5      n U S-   [        [         R                  5      :  aV  [        [         R                  U S-      5      S:  a3  [         R                  U S-      S   S:w  a  [         R                  U S-      $ [         R                   H)  nUR                  S5      (       d  M  U[        S5      S s  $    g! [         a     NJf = f)a  
An experimental API used only when config.benchmark_kernel is true.

The benchmark name is only available at codegen time. So we can not
directly call it in benchmark_all_kernels which is run after codegen.

The function assumes the argument after --only is the benchmark name.
It works for torchbench.py/hugginface.py/timm_models.py. But for ad-hoc
scripts, this function may return None.

There are 2 flavors of --only argument we need handle:
1. --only model_name
2. --only=model_name
z--onlyr+   r   r0  z--only=N)r  argvr   rE   
ValueErrorr  )r  r|  s     rJ   get_benchmark_namer    s    	hhnnX&!Gc#((m#CHHS1W%&*q!!$+88C!G$$ xx>>)$$s9~'((    s   BC 
C"!C"c                &    [        S U  5       5      $ )Nc              3  *   #    U  H	  oS :H  v   M     g7fr+   Nr   rf  s     rJ   r   is_ones.<locals>.<genexpr>=       %u!Avu   rm   r  s    rJ   is_onesr  <      %u%%%rg   c                &    [        S U  5       5      $ )Nc              3  *   #    U  H	  oS :H  v   M     g7f)r   Nr   rf  s     rJ   r   is_zeros.<locals>.<genexpr>A  r  r  r  r  s    rJ   is_zerosr  @  r  rg   c                &    [        S U  5       5      $ )Nc              3     #    U  HI  n[        U[        R                  5      (       d  M$  UR                  [        R                  " S 5      :H  v   MK     g7f)r   N)ri   rC   r  r   )r   r   s     rJ   r    is_cpu_device.<locals>.<genexpr>E  s9      DdELL) 	+u||E**s
   #A*Ar  )inputss    rJ   is_cpu_devicer  D  s       rg   c                    [        U [        R                  5      (       d   S5       eU R                  (       a  [        R
                  $ [        R                  $ )Nz8only support sympy.Expr as input to get_sympy_Expr_dtype)ri   rj   r  r   rC   r:  r2  )r   s    rJ   get_sympy_Expr_dtyper  L  s@    c5::&& B& ~~{{}}rg   c              /     #    U (       a.  [         R                  R                  " U0 UD6 nUv   S S S 5        g S v   g ! , (       d  f       g = f7frx   )rC   r   r   )should_profilerp   r  r   s       rJ   maybe_profiler  V  s;     ^^##T4V4G 54 	 54s   (A=A
AAc                 p    [         R                  R                  n U S:  a  [        R                  " 5       n U $ Nr+   )r[   r^  threadsrC   get_num_threads)r  s    rJ   rd  rd  _  s+    jj  G{'')Nrg   c                     SSK Jn   U " 5       nUR                  S[        R                  R
                  (       a  S5      $ S5      $ )Nr+   )get_backend_options
num_stagesr&     )runtime.triton_helpersr  rw  rC   r  r  )r  optionss     rJ   get_backend_num_stagesr  f  s2    ;!#G;;|%--*;*;QCCCCrg   c                .   SSK JnJn  U [        R                  [        R
                  [        R                  4;   d   e[        R                  " U5      R                  R                  S5      (       a  SSKJn  U" 5       nU [        R                  [        R
                  4;   a  U" X5      $ [        R                  R                  R                  R                   (       a  U" [        R                  U5      $ U" [        R                  U5      $ U [        R                  [        R
                  4;   a  U" U 5      $ [        R                  R                  R                  R                   (       a  U" [        R                  5      $ U" [        R                  5      $ )Nr   )get_max_simd_tflopsget_max_tensorcore_tflops
clock_rate)max_clock_rate)triton.testingr  r  rC   r   r.  r0  inspect	signature
parametersrw  torch._utils_internalr  backendsr<   matmul
allow_tf32)r   r  r  r  sm_clocks        rJ   get_device_tflopsr  n  s   MU]]ENNEMMBBBB,-88<<\JJ8!#U]]ENN33,U==>>%%00,U]]HEE&u}}h??U]]ENN33,U33>>%%00,U]];;&u}}55rg   c                     SSK Jn   U " 5       $ )Nr   get_dram_gbps)r  r  r  s    rJ   get_gpu_dram_gbpsr    s    ,?rg   c                 x    SSK Jn   U R                  R                  R	                  S5      R                  SS5      $ )Nr   r  max_shared_mem)triton.runtimer  r  r  r  rw  r
  s    rJ   get_gpu_shared_memoryr    s.    %==44Q7;;<LaPPrg   c                $    U R                  S5      $ )Nwelford)r  reduction_types    rJ   is_welford_reductionr    s    $$Y//rg   c                4    [        U 5      (       a  gU S:X  a  gg)Nr  online_softmax_reducer&  r+   )r  r  s    rJ   reduction_num_outputsr    s    N++	2	2rg   c                 2    [         R                  " 5       S:H  $ )NLinux)platformsystemr   rg   rJ   is_linuxr    s    ??''rg   c                 (    [         R                  S:H  $ )Nr]   )r  r  r   rg   rJ   r  r    s    <<7""rg   c                &    [        S U  5       5      $ )Nc              3     #    U  H7  n[        U[        R                  5      =(       a    UR                  (       + v   M9     g 7frx   )ri   rj   r  r  rf  s     rJ   r   #has_free_symbols.<locals>.<genexpr>  s)     Jcz!UZZ(<_<cs   ?Arl  )itrs    rJ   ra  ra    s    JcJJJrg   c            	        SSK Jn  U  H  n[        X!R                  UR                  UR
                  UR                  UR                  45      (       aR  [        UR                  5       =(       d    S5      (       d'  [        UR                  5       =(       d    S5      (       a    gM  [        X!R                  5      (       d  M  [        S[        U5       35      e   g)Nr+   r  r   Tzunexpected type for is_dynamic F)r  r  ri   r  r  rb  ComputedBufferr1   ra  maybe_get_sizemaybe_get_strider4   	TypeErrorr   )rp   r  ts      rJ   
is_dynamicr&    s    bmmR[[":K:KRYYW
 
   0 0 2 8b99=M""$*> > > Ayy))=d1gYGHH  rg   c                      \ rS rSrSrSrSrg)Placeholderi  KERNEL_NAMEDESCRIPTIVE_NAMEr   N)r   r   r   r   r)  r*  r   r   rg   rJ   r(  r(    s      K *rg   r(  c                x   SSK Jn  [        R                  " SSSS9 n[        R
                  " 5       n[        R
                  " 5       n[        U[        U5      S9R                  " U6   [        SUR                   3US	9  [        UR                  US	9  [        R                  " 5       n[        X5         U " UR                  5        S S S 5        [        R                  " 5       U-
  n	U" UR                  5        UR                  R                  5         UR                  5         [        S
UR                   3US	9  [        UR                  US	9  UR!                  5       UR!                  5       :H  n
["        R%                  SUUR&                  U
U	5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nr+   )stable_topological_sortwzutf-8F)modeencodingr  )r  	fake_modezBefore:
)filezAfter:
zZ%s, save before/after graph to %s, graph before/after are the same = %s, time elapsed = %s)pattern_matcherr,  r  NamedTemporaryFileior
   rQ   rM   	propagater  rT  r	   nowrP   lint	recompiler  r   r  r   )r7  r  inpr  r,  r  	before_ioafter_io
start_timetime_elapsedr%  s              rJ   pass_execution_and_saver>    sH    9		$	$
 
KKM	;;=R#3C#89CCSI	"(($1-bhhY'\\^
#B,N -||~
2)


#!,bhhX& H$5$5$77hFF	
-
 
 -,
 
s%   BF+3FCF+
F(	$F++
F9c                    SSK Jn  [        XR                  5      =(       a     [        U R                  UR
                  5      $ )z:
Check if input buffer is a multi-outputs template buffer
r+   r  )r  r  ri   CppTemplateBufferr  MultiOutputLayout	input_bufr  s     rJ   is_multi_outputs_templaterD    s7     i!5!56 :"..< rg   c                    SSK Jn  [        XR                  5      =(       a7    [	        U R
                  5      S:H  =(       a    [        U R
                  S   5      $ )zD
Check if input buffer is a output of multi-outputs template buffer
r+   r  r   )r  r  ri   MultiOutputrE   r  rD  rB  s     rJ   #is_output_of_multi_outputs_templaterG    sJ      	9nn- 	;	  !Q&	;%i&6&6q&9:rg   c                V   U c  gSSK Jn  [        U 5      UR                  :H  =(       a    US L =(       d    U R                  UL =(       Gd`    [        U 5      UR
                  :H  =(       Ga@    [        [        R                  R                  S5      =(       a;    U R                  [        R                  R                  R                  R                  :H  =(       d    [        [        R                  R                  S5      =(       a;    U R                  [        R                  R                  R                  R                  :H  =(       df    [        [        R                  R                  S5      =(       a;    U R                  [        R                  R                  R                  R                  :H  $ )NFr+   r  all_to_all_singleall_gather_into_tensorreduce_scatter_tensor)r  r  r   _CollectiveKernelop_overloadFallbackKernelr   rC   r   torchrecrI  defaultrJ  rK  r  rd  r  s      rJ   is_collectiverR  	  s;    | 	T
b***Ud
0Td>N>NRT>T  	T
b''' 	
 	

 		**,?@ U$$		(:(:(L(L(T(TT
 		**,DE E$$99%%<<DDE 		**,CD Y$$		(:(:(P(P(X(XX+rg   c                >    SSK Jn  [        U 5      UR                  :H  $ Nr+   r  )r  r  r   _WaitKernelr  r  s     rJ   is_waitrW  0	  s    :''rg   c                    SSK Jn  [        X5      (       a  [        S U R                   5       5      $ [        U R                  5      $ )Nr   GroupedSchedulerNodec              3  8   #    U  H  n[        U5      v   M     g 7frx   )contains_collectiverf  s     rJ   r   &contains_collective.<locals>.<genexpr>:	  s     @<a&q))<r   )r  rZ  ri   r`  snodesrR  r  snoderZ  s     rJ   r\  r\  6	  s4    >%..@5<<@@@$$rg   c                    SSK Jn  [        X5      (       a  [        S U R                   5       5      $ [        U R                  5      $ )Nr   rY  c              3  8   #    U  H  n[        U5      v   M     g 7frx   )contains_waitrf  s     rJ   r    contains_wait.<locals>.<genexpr>C	  s     :\=##\r   )r  rZ  ri   r`  r^  rW  r  r_  s     rJ   rc  rc  ?	  s4    >%..:U\\:::uzz""rg   c                    SSK Jn  [        U[        R                  R
                  5      (       a  U/n[        XR                  5      =(       a    U R                  U;   $ rT  )r  r  ri   rC   rq  rr  rN  rM  rQ  s      rJ   is_fallback_oprf  H	  sF     "ejj++,,Td--.I43C3Cr3IIrg   c                @    X!U    R                   R                  5          $ rx   )defining_opr  )buf_namename_to_bufname_to_fused_nodes      rJ   buf_name_to_fused_snoderl  S	  s!     (3??HHJKKrg   c                    grg  r   r`  s    rJ   rh  rh  ^	      urg   c           	         U" U 5      (       a  g UR                  U 5        U R                   H-  n[        UR                  X#5      nXa;   a  M   [	        UUUUUS9  M/     g )Ncriteria_cb)r  unmet_dependenciesrl  r   find_recursive_deps_of_node)r`  collected_node_setrj  rk  rr  depdefining_op_for_deps          rJ   rt  rt  Y	  sf     55!''5HHk
 4##	
 (rg   c                    grg  r   rn  s    rJ   rh  rh  w	  ro  rg   c           
        U" U 5      (       a  g UR                  U 5        U R                  5        H  nUR                   H  nUR                  c   eUR                  R	                  5       S:X  a  M2  UR                  R	                  5       U;  a  MR  X6R                  R	                  5          nXq;   a  Mu  [        UUUUUS9  M     M     g )NOUTPUTrq  )r  get_outputsrt  r  r  find_recursive_users_of_node)r`  ru  rj  rk  rr  or  user_ops           rJ   r|  r|  r	  s     55! GGD99(((yy!!#x/yy!!#+==(););)=>G,(""'  !rg   c                j    [         R                  R                  R                  (       a  SOSnX-
  U-
  $ )zaComputes the number of inputs to the aot fw graph which have fixed addresses (params and buffers)r&  r   )rC   
_functorchr[   functionalize_rng_ops)dynamo_gm_num_inputsaot_fw_gm_num_inputsnum_rng_seed_offset_inputss      rJ   num_fw_fixed_argumentsr  	  s3     $$::   69SSSrg   c                   SS jnSn/ nU R                   R                   H8  nUR                  S:X  d  M  U" U5      (       a  UR                  U5        US-  nM:     U[	        [        [        U5      5      5      :X  d   e[        U5      $ )z6
Infers which inputs are static for a backwards graph
c                    SU R                   ;  =(       a;    SU R                   ;  =(       a%    SU R                   ;  =(       a    SU R                   ;  $ )Ntangentsbwd_seedbwd_base_offsetbwd_rng_staterT  rX  s    rJ   is_saved_tensor'count_tangents.<locals>.is_saved_tensor	  sH    aff$ .!&&(.!/.  qvv-		
rg   r   r~  r+   )rG   r*   r   r  )rT  r  rd  r}  rF  r   rE   )fx_gr  	arg_countstatic_arg_idxsr  s        rJ   count_tangentsr  	  s    

 IOZZ44= q!!&&y1NI	  d5_)=#>????rg   c                  >    \ rS rSr% S\S'   SS jr\S	S j5       rSrg)
	BoxedBooli	  r  r|   c                    U R                   $ rx   )r|   r
  s    rJ   r  BoxedBool.__bool__	  s    zzrg   c                @    [        U [        5      (       a	  SU l        U $ grg  )ri   r  r|   r  s    rJ   disableBoxedBool.disable	  s    c9%%CIJrg   r   NrT  )r  r   r   zUnion[BoxedBool, bool])	r   r   r   r   r   r  r  r  r   r   rg   rJ   r  r  	  s     K  rg   r  c              #     ^ ^#    SSK Jn  UR                  m   S             SU U4S jjjn[        R                  R                  USU5         S v   S S S 5        g ! , (       d  f       g = f7f)Nr+   r-   c                :   > TR                  U5        T" XX#XE5      $ rx   r}  )r  kernel_namer  r  gpucpp_definitionkernel_listorig_define_kernels         rJ   define_kernel.collect_defined_kernels.<locals>.define_kernel	  s'     	;'!{c
 	
rg   r  )NTN)r  r.   r  r   r  r   r  Optional[str]r  r  r  r  r   r   )codegen.wrapperr.   r  r   r  r^  )r  r.   r  r  s   `  @rJ   collect_defined_kernelsr  	  s     5-;; #'(,
"

 
  	

 
 &
 

 
 
		/-	P 
Q	P	Ps   AA2A!	A2!
A/+A2c                    U S-   $ )N__original__r   rT  s    rJ    get_cloned_parameter_buffer_namer  	  s    .  rg   c                    U [         ;   $ rx   )rA   r  s    rJ   r  r  	  s    Yrg   c                0    U S:g  =(       a    [        U 5      $ )Nr=   )r  r  s    rJ   device_need_guardr  	  s    U?-vf~-rg   c                   [         R                  " 5       (       ao  U [        R                  :X  a[  [        R                  R                  5       (       a8  [        R                  R                  5       S:  a  [         R                  (       a  gU [        [        R                  [        R                  [        R                  /5      ;   $ )N)r  r   F)r[   r,  rC   r.  r<   rD   get_device_capabilitybfloat16_atomic_adds_enabledr   r:  r  r  s    rJ   ,needs_fallback_due_to_atomic_add_limitationsr  	  sv    
 	U^^#JJ##%%JJ,,.&8//
EKKU^^#LMMMrg   c                   U R                   [        R                  R                  R                  [        R                  R                  R
                  4;   a  Uc  gU R                   [        R                  R                  R                  :X  a  SOSnUS U4;  =(       Gd&    U=(       a    [        U5      =(       a    [        U5      =(       d    U R                   [        R                  R                  R                  :H  =(       ap    US:H  =(       ad    U=(       a[    US:H  =(       aO    [        R                  R                  =(       a.    [        R                  R                  =(       d    [        5       S:g  =(       dJ    X:H  =(       a#    U[        R                  [        R                  4;   =(       d    [        R                   " 5       $ )NFr  r   r   r+   )overloadpacketrC   r   atenscatter_reduce_scatter_reducescatter_r  r  r[   r^  fallback_scatter_reduce_sumdynamic_threadsrd  r  r:  rs  )rM  r  
self_dtype	src_dtypesrc_device_typesrc_is_tensor	reduce_tys          rJ   use_scatter_fallbackr  	  s]    	""IINN**EIINN,I,IJ	K" ++uyy~~/F/FFE 
 	tY// 	8 	8 H'H<YG		8 &&%))..*H*HH L%'LL  5(L 

66	L
 ++J/C/E/J	8 'SJ5::u{{:S,S	8 557!rg   c                   SSK JnJn  SSKJn  [        S[        U 5       S35        [        U 5       GH.  u  pE[        SUS S35        XRL a  [        S	5        M'  XQL a  [        S
5        M8  [        XS5      (       a  UR                  5       n[        U(       a  SOS S35        U(       a;  UR                  c   e[        SUR                  R                  R                   35        [        S5        UR                  R                   H  n[        U5        M     [        S5        UR                  R                   H  n[        U5        M     GM  [!        S[#        U5       35      e   g)z
An API that can be used in pdb to dump a node_schedule.
Right mainly dump the read/write dependencies but can add more as needed.
r   DisableReductionEnableReduction)SchedulerNodezNode schedule with z nodesr  3r(  zenable reductionzdisable reductionredpwz scheduler nodeNzoriginal reduction hint zReadDep:z	WriteDep:zUnrecognized node type: )torch._inductor.codegen.simdr  r  r  r  r  rE   r   ri   is_reductionr  r  reduction_hintread_writesreadswritesr   r   )r  r  r  r  r  r  is_redrv  s           rJ   dump_node_scheduler  
  s&   
 O7	M 236
:;}-	#al"$%%%&,,&&(FfU$/?@yy,,,01N1N0OPQ*''--c
 .+''..c
 / !9$t*FGG' .rg   c                z    SSK Jn  U" U R                  5       [        U R                  5      -  [
        -  S:H  5      $ )Nr   )r  )rr  r  storage_offsetr  r   GPU_ALIGN_BYTES)r   r  s     rJ   tensor_is_alignedr  ;
  s:     L 				 >&,,#?	??RVWW rg   c                    [        U R                  R                  5      (       d  g[        R                  =(       d    [        U 5      $ rg  )r  r   r   r[   assume_aligned_inputsr  )example_inputs    rJ   should_assume_input_alignedr  I
  s5     -&&++,,''K+<]+KKrg   c                    [         R                  R                  R                  5       n U (       d  [        R
                  " 5       $ U R                  R                  nU(       d  [        R
                  " 5       $ UR                  5       $ rx   )	rC   _guardsTracingContexttry_getr/  nullcontextr0  rV  suppress_guards)tracing_contextrV  s     rJ   #maybe_get_suppress_shape_guards_ctxr  R
  sb    
 mm22::<O%%''  ))33I%%''$$&&rg   c                "   [         R                  R                  R                  [        SS5         [
        R                  R                  5         SS KnSS K	nUR                  " 5       nUR                  " U5      nSSKJn  UR                  U5        UR                  nUR!                  UR"                  5        U " U0 UD6n	UR%                  5       n
UR!                  U5        UR'                  U5        S S S 5        X4$ ! , (       d  f       W	W
4$ = f)Nr   Tr   )output_code_log)r  r   r  r^  r[   rC   rs  r  r4  loggingr
   StreamHandlertorch._inductor.codecacher  
addHandlerlevelsetLevelDEBUGr  removeHandler)r   rp   r  r4  r  log_capture_stringchr  
prev_levelr  r   s              rJ   run_and_get_cpp_coder  c
  s     
			#	#FGT	:[[]""#56=""2&$**
  /T$V$'')  ,%%b) 
;  9! 
;	:  19s   CC==
Dc                    [        U 5      nUb  UR                  $ U  H:  n[        U[        R                  5      (       d  M$  UR
                  R                  s  $    g rx   )rM   rV  ri   rC   r&   r  )r  r0  inputs      rJ   shape_env_from_inputsr  |
  sR     (I """ eU\\**::''' 
 rg   c                B   ^ ^^ [        T5      S:X  a  T $ SUU U4S jjnU$ )Nr   c                   > [        U TT5      u  pT" U 5      n[        U5      (       a  [        R                  " X5        U$ rx   )copy_misaligned_inputsrE   rC   _foreach_copy_)
new_inputsold_tensorsnew_tensorsr  inputs_to_checkr  mutated_input_idxss       rJ   r  )align_inputs_from_check_idxs.<locals>.run
  sD    #9);$
  J {  :
rg   )r  list[InputType]r   r   )rE   )r  r  r  r  s   ``` rJ   align_inputs_from_check_idxsr  
  s(    
 ?q   Jrg   c                X   SU R                  5       ;   a  SnO;[        S [        U R                  5       U R                  5       5       5       5      S-   n[        R
                  " X4S5      R                  5       n[        R
                  " X R                  5       U R                  5       5      $ )Nr   c              3  6   #    U  H  u  pUS -
  U-  v   M     g7fr  r   )r   shaperh  s      rJ   r   )clone_preserve_strides.<locals>.<genexpr>
  s     T:Sf$:Ss   r+   rv   )r  r   r   rh  rC   
as_stridedclone)rG   needed_sizebuffers      rJ   clone_preserve_stridesr  
  s    AFFH} T#affh
:STTWXX 	 a6<<>FFFFHahhj99rg   c                T   / n/ nUSLnU H  nX   n[        U[        R                  5      (       d   S[        U5       35       eUR	                  5       [
        -  (       d  MW  [        U5      X'   U(       d  Mm  Xb;   d  Mt  UR                  U5        UR                  X   5        M     X44$ )z
Clones misaligned tensors which we inferred were aligned. Returns a tuple of [old_tensors], [new_tensors] for every
cloned tensor which is in `return_pair_idxs`.
Nz Expected tensors only, but got: )ri   rC   r  r   data_ptr	ALIGNMENTr  r}  )r  check_inputs_idxsreturn_pair_idxsr  r  ret_pair_definedr   _inps           rJ   r  r  
  s     ')K&(K (t3}$-- 	
.tDzl;	
- ==?Y&&248JMA$9""4("":=1  ##rg   c                    / nU HV  nX   n[        U[        R                  5      (       d  M(  UR                  5       [        -  S:X  d  ME  UR                  U5        MX     [        U5      [        U5      :w  a  U$ U$ )zO
We require all inputs to be aligned, so introduce a copy for any
that aren't.
r   )ri   rC   r  r  r  r}  rE   )r  static_input_idxsaligned_static_input_idxsr  r  s        rJ   remove_unaligned_input_idxsr  
  sp     !# eU\\**0@90LQR/R%,,S1 ! $%->)??((rg   c                   SSK Jn  [        R                  " [        R                  5      R
                  nUR                  R                  R                  nUR                  R                  R                  R                  nUR                  R                  R                  X:*  5      (       a  gU" U 5      =(       a    U" U 5      U:*  $ )Nr+   rP  T)rS  rQ  rC   iinfor8  r   rT  rU  r  rV  has_hintr  )r   rQ  int_maxr  r  s        rJ   expr_fits_within_32bitr  
  s    kk%++&**G  **Iww))22H 	ww--al;;A;29Q<722rg   c                6  ^^^ [         R                  R                  R                  5       nUb  UR                  b  [        UR                  5      S:X  d   e[        U 5      mUR                  c   eUR                   H  nUc  UR                  R                  S 5        M#  Sm[         R                  R                  R                  5       =n(       a  UR                  mSUU4S jjmUR                  R                  [        U4S jU 5       5      5        M     g g g )Nr   Fc                r   > Tc  [        U 5      $ T(       a  TR                  U 5      $ TR                  U 5      $ rx   )ry   deserialize_symexprevaluate_symexpr)r   fakify_first_callrV  s    rJ   map_expr4set_tracing_context_output_strides.<locals>.map_expr  s7     ("1v((<<Q??$55a88rg   c              3  4   >#    U  H  nT" U5      v   M     g 7frx   r   )r   r   r  s     rJ   r   5set_tracing_context_output_strides.<locals>.<genexpr>  s     5u!(1++ur  )r   r   r   z,Union[float, int, SymInt, SymFloat, SymBool])
rC   r  r  r  output_stridesrE   r  r}  r  r  )r  compiled_graphr  r  r  r  r  rV  s        @@@rJ   "set_tracing_context_output_stridesr  
  s     mm**224Gw55A7))*a///).9	,,888#22E}&&--d3$)!--66>>@@3@(+(=(=%9 9 &&--5u55 3	  Brg   c                 4   [         R                  b  [         R                  $ [         R                  " 5       (       d  g[        R                  R                  5       (       a  g SSKJn   U [        R                  R                  S5      :  $ ! [         a     gf = f)NFr   REMOTE_CACHE_VERSIONz.pytorch/remote_cache:fx_graph_memcache_version)
r[   fx_graph_remote_cacher,  rC   _utils_internalis_fb_unit_testtorch._inductor.fb.remote_cacher!  ModuleNotFoundErrorjustknobs_getval_intr   s    rJ    should_use_remote_fx_graph_cacher(    s    ##/+++,,..H  5#8#8#M#M8$    s   "B
 

BBc                2    [         R                  " SSU 5      $ )Nz[^a-zA-Z0-9_]r   )r  subrT  s    rJ   normalize_namer+  #  s    66"C..rg   ztl.int1ztl.float8e4nvztl.float8e5ztl.float8e4b8ztl.float8e5b16ztl.uint8)ztl.boolztl.float8_e4m3fnztl.float8_e5m2ztl.float8_e4m3fnuzztl.float8_e5m2fnuzztl.float8_e8m0fnuztl.float4_e2m1fn_x2z^.*[.]c                j    [         R                  S[        U 5      5      n[        R	                  X5      $ )z"Convert torch.dtype to triton typetl.)_triton_type_rer*  r   _triton_type_mappingrw  )r   triton_type_names     rJ   triton_typer1  9  s+    &**5#e*=##$4GGrg   c                    [         R                  X 5      nUR                  SS5      n[        [        U5      n[        U[        R                  5      (       d   eU$ )Nr-  r  )_torch_triton_mappingrw  r  rB   rC   ri   r   )r   adjusted_type	type_namerK  s       rJ   triton_type_to_torchr6  ?  sM    )--e;M%%eR0Iy)Ii----rg   c                   U R                   (       + =(       a    U R                  5       UR                  5       :H  =(       a    U R                  5       UR                  5       :H  =(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       ae    U R                  5       R                  5       UR                  5       R                  5       :H  =(       a!    U R                  5       UR                  5       :H  $ rx   )	is_mkldnnr  rh  r   r   untyped_storager  r  r  r|   s     rJ   is_same_tensorr;  G  s    NN 	<IIK5::<'	<KKMU\\^+	< JJ%++%	< KK5<<'		<
   "++-1F1F1H1Q1Q1SS	< !U%9%9%;;rg   c                   U R                   =(       a    U R                  5       UR                  5       :H  =(       a    U R                  UR                  :H  =(       as    U R                  UR                  :H  =(       aS    [        R
                  R                  R                  U 5      [        R
                  R                  R                  U5      :H  $ rx   )r8  r  r   r   rC   r   mkldnnr  r:  s     rJ   is_same_mkldnn_tensorr>  S  s     	PIIK5::<'	PJJ%++%	P KK5<<'	P II%%d+uyy/?/?/H/H/OOrg   c                     g)N)r@  isnanlogical_notlogical_andsignbitand_leltgegteqner  xorr   r   rg   rJ   boolean_opsrL  ]  s    rg   c                  *    \ rS rSr% S\S'   S\S'   Srg)OpDtypeRuleiq  r'   type_promotion_kindOptional[torch.dtype]override_return_dtyper   Nr  r   rg   rJ   rN  rN  q  s    8800rg   rN  zdict[str, OpDtypeRule]op_dtype_propagation_rulesc                (    [        X5      [        U '   g rx   )rN  rR  )r   rO  rQ  s      rJ   #register_op_dtype_propagation_rulesrT  z  s    
 (3(t$rg   zOrderedSet[str]op_requires_libdevice_fp64c                .    [         R                  U 5        g rx   )rU  r  rT  s    rJ   #register_op_requires_libdevice_fp64rW    s    ""4(rg   c                     SSK Jn   U R                  R                  5       R                  nUS:X  a  [
        R                  $ US:X  a  g[
        R                  $ )Nr   rP  r   r=   )r  rQ  rT  get_current_device_or_throwr   r[   cpu_backendcuda_backend)rQ  
device_strs     rJ   get_current_backendr]    sH    -446;;JU!!!	u	"""rg   c                    U [         R                  [         R                  4;   a=  [        R                  R
                  (       a  [        5       S:X  a  [         R                  $ U $ )z"Maybe upcast [b]float16 to float32r  )rC   r   r.  r[   r  codegen_upcast_to_fp32r]  r0  r  s    rJ   upcast_compute_typer`    s@     	%--00MM00!X-}}Lrg   KeyTypeValTypec                  v    \ rS rSrSrSS jrSS jrSS jrSS jrSSS jjr	SS	 jr
SS
 jrSS jrSS jrSrg)
ScopedDicti  z
A dictionary-like object that allows for scoped updates. It maintains
an original dictionary and a set of new items that can override
the original items within the scope.  The original dictionary is
unmodified.
c                    Xl         0 U l        g rx   original_dict	new_items)r  rg  s     rJ   r  ScopedDict.__init__  s    *13rg   c                \    XR                   ;   a  U R                   U   $ U R                  U   $ rx   rh  rg  r  s     rJ   r  ScopedDict.__getitem__  s,    .. >>#&&!!#&&rg   c                     X R                   U'   g rx   )rh  )r  rG  r|   s      rJ   __setitem__ScopedDict.__setitem__  s    #srg   c                H    XR                   ;   =(       d    XR                  ;   $ rx   rk  r  s     rJ   __contains__ScopedDict.__contains__  s    nn$A/A/A(AArg   Nc                t    XR                   ;   a  U R                   U   $ U R                  R                  X5      $ rx   )rh  rg  rw  )r  rG  rP  s      rJ   rw  ScopedDict.get  s2    .. >>#&&!!%%c33rg   c                    [        U R                  5      nU R                   H  nX R                  ;  d  M  US-  nM     U$ r  )rE   rg  rh  )r  r  r`  s      rJ   r}  ScopedDict.__len__  s<    ""#A***Q   rg   c              #     #    U R                    S h  vN   U R                   H  nXR                   ;  d  M  Uv   M     g  N-7frx   rf  )r  r`  s     rJ   __iter__ScopedDict.__iter__  s8     %%%%A***   	&s   AA  A
Ac                R    [        U R                  =(       d    U R                  5      $ rx   )r  rg  rh  r
  s    rJ   r  ScopedDict.__bool__  s    D&&8$..99rg   c                    [         erx   rm  r  s     rJ   __delitem__ScopedDict.__delitem__  s    !!rg   rk  )rg  zMapping[KeyType, ValType])rG  ra  r   rb  )rG  ra  r|   rb  r   r  )rG  r^  r   r  rx   )rG  ra  rP  Optional[ValType]r   r  r  )r   zIterator[KeyType]rT  )rG  ra  r   r  )r   r   r   r   r   r  r  rn  rq  rw  r}  rx  r  r}  r   r   rg   rJ   rd  rd    s5    4'
$B4
:"rg   rd  )frozen_defaultc              .   ^ SU4S jjnU c  U$ U" U 5      $ )Nc                   > [         R                  S:  a  [        R                  " U STS9$ [        R                  " U TS9$ )N)r  r  T)kw_onlyr   r   )r  version_infodataclasses	dataclass)r{   r   s    rJ   wrapir_dataclass.<locals>.wrap  s;    w&((d6JJ ((V<<rg   )r{   r^   r   r^   r   )r{   r   r  s    ` rJ   ir_dataclassr    s    = {9rg   c                     [         R                  R                  R                  5       n U b'  U R                  (       a  U R                  R
                  $ g rx   )rC   r  r  r  fw_metadatabw_donated_idxs)r  s    rJ   get_donated_idxsr    s=    mm22::<O"'B'B**:::rg   c                  ^ SSK JnJn  SSKJn  SSKJn  U(       a^  [        X5      (       d   eUR                  R                  R                  U/ 5      mTR                  U4S jU R                   5       5        g [        U [        5      (       d   eU  Hp  nXtU4;  d  M  UR                  c  M  UR                  R                  R                  U/ 5      mTR                  U4S jUR                  R                   5       5        Mr     g )Nr+   r  )r3   rP  c              3  `   >#    U  H#  nUR                   T;  d  M  UR                   v   M%     g 7frx   rT  r   r  curr_node_infos     rJ   r   :set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>  s*      
/{{.0 FKK/   ..c              3  `   >#    U  H#  nUR                   T;  d  M  UR                   v   M%     g 7frx   rT  r  s     rJ   r   r  	  s*      *&8F!;;n< $&8r  )codegen.simd_kernel_featuresr  r  r  r3   rS  rQ  ri   r   ._inductor_triton_kernel_to_post_grad_node_info
setdefaultr  r  rF  r  )	r  r  	is_externr  r  r3   rQ  r`  r  s	           @rJ   'set_kernel_post_grad_provenance_tracingr    s    
 P#-9999GGBBMMR 	
 	 
'//
 	
 -...."E.>??::)%&WW%[%[%f%f#R&N #)) *&+jj&8&8*  #rg   c                  (    \ rS rSrSrSrSrSrSrSr	g)	TritonAttrsDescriptorVersioni  r   r+   r&  r  r  r   N)
r   r   r   r   V0_NO_TRITONV1_COMPILERV2_BACKENDSV3_BACKENDS_TUPLEV4_DICTr   r   rg   rJ   r  r    s     LKK	  Grg   r  c                 f   [         R                  R                  S5      c  [        R                  $ SS Kn SS Kn [        U R                  R                  S5      (       a  [        R                  $ [        U R                  R                  S5      (       a  [        R                  $ [        R                  $ )Nr  r   AttrsDescriptor)	importlibutil	find_specr  r  triton.backends.compilertriton.compiler.compilerr   r   compilerr  r  r  )r  s    rJ   #get_triton_attrs_descriptor_versionr    s    ~~)1+888##v''):;; ,777	))+<	=	=+777 ,333rg   c                 8    [        5       [        R                  :H  $ rx   )r  r  r  r   rg   rJ   triton_version_uses_attrs_dictr  4  s    .04P4X4XXXrg   c                &   SSK Jn  [        XR                  5      (       d  g[        U R                  [
        R                  R                  5      (       a=  [
        R                  R                  R                  U R                  R                  ;   a  gg)ze
Returns True if the node is an op that is not cudagraphable.
Usually only custom ops have this tag.
r+   r  FT)r  r  ri   rN  rM  rC   rq  rr  r   ru  rv  rw  rV  s     rJ   is_cudagraph_unsafe_opr  8  sb    
 d--.. 	4##UZZ%:%:;;HHLL))T-=-=-B-BBrg   c                 6   [         R                  R                  SS5      n [        R                  " 5       (       a^  SSKJn  U" 5       nU(       aJ  [         R                  R                  USS5      nU (       a   [         R                  R                  X0/5      OUn U $ )NLD_LIBRARY_PATHr  r   )get_runtime_pathr  lib)
r  r  rw  r[   r,  libfb.py.parutilr  r  r  pathsep)r  r  runtime_pathlib_paths       rJ   get_ld_library_pathr  K  sh    ::>>+R0D5')ww||L)UCH8<2::??H#34(DKrg   c                N    SSK Jn  [        X5      =(       a    U R                  S L$ )Nr   )SubgraphPythonWrapperCodegen)torch._inductor.codegen.wrapperr  ri   partition_signatures)r  r  s     rJ   #is_codegen_graph_partition_subgraphr  X  s'    L 	79 	5((4rg   c                    SSK Jn  UR                  R                  R	                  U S5      (       a;  UR                  R                  R                  U S5      (       a  [        R                  $ [        R                  $ )Nr+   rP  l        i   )	rS  rQ  rT  rU  r  statically_known_geqrC   r8  r:  )r  rQ  s     rJ   dtype_from_sizer  a  sX    ww++e 
''


/
/h
?
?{{{{rg   )r   r>   c                n    U S:X  a(  [         R                  R                  R                  5       $ SU ;   a  gg)z3
Returns True if the device supports MKL-DNN BF16.
r   r>   TF)rC   r   r=  _is_mkldnn_bf16_supportedr   s    rJ   is_mkldnn_bf16_supportedr  o  3     eyy99;;	+	rg   c                n    U S:X  a(  [         R                  R                  R                  5       $ SU ;   a  gg)z3
Returns True if the device supports MKL-DNN FP16.
r   r>   TF)rC   r   r=  _is_mkldnn_fp16_supportedr  s    rJ   is_mkldnn_fp16_supportedr  {  r  rg   rR  )re   ry   r   ry   )rs   r   r   r  )   d   )r   zCallable[[], Any]r   ry   r   ry   r   r=  rT  )r   z"Union[Optional[torch.device], str]r   torch.device)r  zIterable[sympy.Expr]r   r   )r  Sequence[sympy.Expr]r  r  r   r   )r  zIterable[_T]r   zValuesView[_T])r  Union[int, sympy.Expr]r  r  r   r  )rG  rP  r   r   )rM  z"Iterable[Union[int, torch.SymInt]]r   zlist[sympy.Expr])r   r  r   zUnion[int, torch.SymInt])rM  z Iterable[Union[int, sympy.Expr]]r   zlist[Union[int, torch.SymInt]])rd  torch._ops.OpOverloadr   r  )rx  r*   rn  z'Callable[[torch._ops.OpOverload], bool]r   r  )rp  r   rp   ry  r  dict[str, Any]r   z&tuple[GraphModule, list[torch.Tensor]])r<   )r   r   r   r  )r+   r<   )
r  Callable[..., Any]r  Sequence[Any]r   ry   r   r   r   r=  )r   r  r  g      ?r<   )r  r  r  r  r   ry   r  ry   r  r=  r   r   r   r=  )r  r   r  r   r   r  )r  r   r  r   r   r  )r  ry   r  ry   r   ry   )rG   zUnion[int, Sequence[int]]r  ry   r   Sequence[int])rG   ztuple[_T, ...]r   zlist[_T])r   z!Callable[Concatenate[Any, P], RV]r   zCachedMethod[P, RV])r  0Union[Sequence[BaseSchedulerNode], ExternKernel]r   zOrderedSet[Node])r  Sequence[BaseSchedulerNode]r  z8Literal[True, 'torch', 'original_aten', 'inductor_node']r   r   )r  r  r  r.   r   ztuple[str, str]rx   )r  zIterable[torch.fx.Node]r  zOptional[Callable[[Any], bool]]r   zOrderedSet[torch.fx.Node])rp   zSequence[IRNode]r  zdict[str, IRNode]r   zOrderedSet[IRNode]r:  )r   r   r   zValueRanges[Any])rE  r   r   r  )rE  rX   r  ry   r   r]  )rQ  r  r   r  )r   r   r   r]  )r(  r   r_  zdict[sympy.Expr, Any]r   r   )r  r   r   z,TypeGuard[Union[torch.SymInt, torch.Tensor]])rp   r   r   r  )r  torch.fx.GraphModuler   zOptional[torch.fx.Node])r  r  r   r*   )r  r  r   zOrderedSet[torch.device]rS  )r  r   r   r   )NNT)r  zOptional[dict[str, Any]]r  r  r  r  r   r.  )r  r  r   	list[int])rV  r)   r  z.Sequence[Union[int, torch.SymInt, sympy.Expr]]r   r  )r   torch.dtyper   ry   r-  rQ  )r  zUnion[int, torch.device]r   r  r  )r  ry   r   r  r  Optional[int]r   r,   )r  r5   r  zlist[torch.dtype]r   r  )r  r   r   r  )r  r5   r  r  r  r  r   r  )r  r4   r   r  )
r  r5   r  ry   r  ry   r`  ry   r   r  )r  r   r   r  )r  r  r  r  r`  r  r   r  )r  r  r  r  r`  r  r   r  )r   r   r   r   )r   zQtuple[Optional[str], Callable[[], list[Any]], Callable[[], list[Any]], type[Any]])r  r5   r   r  )r  r5   rC  zUnion[ReinterpretView, Buffer]rD  r4   r   r  )FTFN)r  r5   rC  r4   rD  r4   rL  r  rA  r  rf  r  rT  r  r   r  )r   Callable[P, _T]rp   r  r  r  r   ztuple[_T, list[str]])r   r  r   ztuple[Any, list[str]])r   r  rp   r  r  r  r   r   )r   r  rp   r  r  r  r   r   )r   r  rp   r  r  r  r   ztuple[Any, list[GraphLowering]])r  r  r  r  r   r.  )r  r  r  zOptional[Callable[..., Any]]r   r   )r  r   r   r  )r   r  )r  r  r   r  )r  zSequence[torch.Tensor]r   r  )r   r   r   r  )r  r  rp   r   r  r   r   zIterator[Any])r  r   r   r  )r  r   r   ry   )r  zIterable[Any]r   r  )
r7  r  r  r(   r9  r  r  r   r   r  )rC  z"Optional[Union[Buffer, Operation]]r   r  )r  z Optional[Union[Node, Operation]]rd  z!Optional[torch._ops.OperatorBase]r   r  )r  z"Optional[Union[IRNode, Operation]]r   r  )r`  r:   r   r  )r  zOptional[Operation]rd  z?Union[torch._ops.OpOverload, Collection[torch._ops.OpOverload]]r   r  )ri  r   rj  r  rk  r  r   r   )r`  r:   ru  zMutableSet[BaseSchedulerNode]rj  zdict[str, SchedulerBuffer]rk  zdict[str, BaseSchedulerNode]rr  zCallable[[Any], bool]r   r  )r  ry   r  ry   r   ry   )r  r  r   ry   )r  r   r   r.  )r   r   r   r   )r   r  r   r  )r   r   r   r  )r   r  r   r  )rM  r  r  r  r  r  r  r  r  r   r  r  r   r  )r  r  r   r  )r   r  r   r  )r  r  r   r  )r   rU  )r   r  rp   r  r  r  r   ztuple[_T, str])r  Sequence[InputType]r   zOptional[ShapeEnv])r  Callable[[list[InputType]], _T]r  r  r  zOrderedSet[int]r   r  )rG   r  r   r  )r  r  r  r  r  zOptional[OrderedSet[int]]r   z-tuple[list[torch.Tensor], list[torch.Tensor]])r  r  r  r  r   r  )r   r   r   r  )r  r  r  r9   r   r  )r   r  r   r   )r   r   r   r  )r  r  r|   r  r   r  )r   ztuple[str, ...])r   r   rO  r'   rQ  rP  r   r  )r   r   r   r  )r   r  r   r  )r{   zOptional[type[Any]]r   r  r   r   )r   zOptional[list[int]]rV  )r  z3Union[Sequence[BaseSchedulerNode], ExternKernelOut]r  r   r  r  r   r  )r   r  )r  r6   r   r  )r  r.   r   r  )r  ry   r   r  )r   r   r   r  (S  
__future__r   r  r/  r  enumr  r  r  r4  r  r  r?  r  r  r  r  r  r   r  r  rA  r  r  collections.abcr   r   r   r   r   r	   r
   typingr   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   r   rj   rC   torch._inductor.runtime.hintsr   torch.utils._ordered_setr   torch.utils._pytreer   OPTIMUS_EXCLUDE_POST_GRADr!   r"   r#   r$   r%   r&   torch._prims_commonr'   torch.fxr(   rr  r)   torch.fx.noder*   r  r,   r  r.   rT  r0   r  r1   r2   r3   r4   r5   r6   r7   output_coder9   r  r:   r;   rA   r?   r   rK   torch._dynamo.device_interfacerL   torch._dynamo.utilsrM   torch.autogradrN   torch.autograd.profiler_utilrO   (torch.fx.passes.graph_transform_observerrP   torch.fx.passes.shape_proprQ   torch.utils._sympy.functionsrR   rS   rT   rU   rV   torch.utils._sympy.symbolrW   rX   torch.utils._sympy.value_rangesrY   rZ   r  r[   runtime.runtime_utilsr\   r  _IS_WINDOWS	getLoggerr   r   r^   r  r  	VarRangesr  ry   	InputTypeGPU_KERNEL_BIN_EXTSr  r  r  r  rd   rf   ro   Functionrq   r  r   r   r   r   r   r  r  r  rJ  rN  rX  rZ  re  rl  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r8  rB  rF  rM  rR  rU  ra  re  rm  ry  r  r  r  r  r  r   r  r  r0  r  clear_on_fresh_inductor_cacheclear_inductor_cachesfresh_inductor_cacher  r  	lru_cacher  r  r  r  rX  rh  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r&  r2  r7  r:  r=  r?  rE  rB  rj  rl  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rd  r  r  r  r  r  r  r  r  ra  r&  Enumr(  r>  rD  rG  rR  rW  r\  rc  rf  rl  rt  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r(  r+  r/  r  r3  compiler.  r1  r6  r;  r>  rL  rN  rR  rT  rU  rW  r]  r`  ra  rb  rd  r  r  r  r  r  r  r  r  r  r  SUPPORTED_MKLDNN_DEVICESr  r  )r`  rs   s   00rJ   <module>r     s   "        	     	  	   
     U U          : / - (  >>//C$>",5$   -= #	CL
   D 0 % 2 K 0  8 D  = llg%!T]UZZ'(	U5<<ell:;<	'7 	 {Q'A-+2B XDX XB5
LENN  d#  $"D DP 9<SS#&S25S
Sl  ;@
+*"*+A**#AL+	+++"/	)/#/G @OI	I<I 
I0 *8+0' ' 	!  	
 ( %'!  	
    )'#$  cNTT"E8WQU^ E:C*!).!)O!) 	!)H62C62!62 62v 48*0 (E
E$5EE&,^%	DU	>2-888v'& 
: !# I "	 .24+4	4 4 	4 4p !5 $ " A!!L!!H Q7 7*  , , ,
O Od
 
 @ @ @?' ?  8 J JI #'   	(+<	  -2%)BF	0&R@J    * CO,) ,  3, 3,l @ @ R R:+\H&

8
@F
	
" ""&"&==
= = 	=
  = =  = 
=@'C C"      	 $ &2:/)X &2:## &#2:#$#* ...@.. .$ IMFF)EFF*	B&&   D D 6 66  Q0(#K(*$)) *!

!
"-!
4A!
HK!
	!
H1	" -1!
*!)! 
!H(%#J
JGJ 
JLL .LDRLL *=

5
 ,
 5	

 '
 

< *=5 , 5	
 ' 
:T 2     ,!.N $&$!$ $ 	$
 $ $ 
$NH>L'" &2:2(*" ( %	0	: 37$$$$ 0$ 3	$<$ $3!3B	:&/ '#)* $%
  +?*D*D*FG*F$!*FG  **Y'H	  & 1 1 1
 68 2 7
8 1 
	 /9l O :)	# )

)
-" 01 -"` D)t   * !F!! ! 
	!H499  4 42Y&
 * 		 	Q
 Hs   i7