
    7h                   
   S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJrJrJrJrJrJr  S S
KJ r J!r!J"r"J#r#J$r$J%r%  S SK&J'r'  S SK(r)S SK*r)S SK+J,s  J-r.  S SK/J0r0  S SK)J1r1  S SK2J3r3  S SK4J5r5J6r7Jr8J,r9  S SK:J;r;  S SK<J=r=  S SK>J?r?J@r@JArAJBrBJCrCJDrDJErEJFrFJGrG  S SKHJ6rI  S SKJJKrK  S SKLJMrMJNrNJOrO  S SKPJQrQJRrRJSrS  S SKTJUrUJVrVJWrWJXrX  S SKYJZrZ  S SK[J\r\J]r]J^r^J_r_J`r`Jara  S SKbJcrc  S SKdJereJfrfJgrgJhrhJiriJjrjJkrkJlrlJmrm  S SKnJoro  S SKpJqrq  S SKrJsrs  S SK*Jtrt  S S KuJvrvJwrw  S S!KxJyry  S S"KzJ{r{  S S#K|J}r}  S$S%K~Jr  S$S&KJrJr  S$S'KJr  S$S(KJr  S$S)KJr  S*S+KJ6r6Jr  S*S,KJrJr  S*S-KJr  S*S.KJr  S*S/KJr  S*S0KJr  S*S1KJrJr  S*S2KJr  S*S3KJr  S*S4KJrJr  S*S5KJr  S*S6KJr  S*S7K,JrJrJrJrJrJrJrJrJr  S*S8KJr  \(       a   S S9KJrJr  S S:K[Jr  S S;KJr  S S<KJr  S*S=KJr  \"" S>5      r\" S?5      r\(       d  \6GRt                  " 5       (       d  SS@ jrSSA jrOS SBKJrJr  \(       a
  S SCKJrJrJr   " SD SE\GR                  5      rSSF jr\" 5       u  rr\GR                  " \5      r\)GR                  GR                  \SG5      r\)GR                  GR                  \SH5      r\)GR                  GR                  \SI5      r\)GR                  GR                  \SJ5      r\)GR                  GR                  \SK5      rSSL jrSSM jrSSN jrSSO jr\GR                  " S5      SSP j5       r\GR                  SSQ j5       rSSR jr        SSS jr S     SSU jjr      SSV jr S     SSW jjrSSSX jjr   S         SSY jjrSSZ jr    SS[ jr      SS\ jr S       SS] jjr S   SS^ jjr\GR                  SS_ j5       r " S` Sa\$STSb9r " Sc Sd\#5      r        SSe jr\" SfSg9        SSh j5       r " Si Sj5      r " Sk Sl\5      r " Sm Sn\5      r          SSo jr      SSp jr SSqSqSqSr.                   SSs jjjrSSt jr        SSu jr S       SSv jjr\S4         SSw jjr\" S 5      r                  SSx jrSSy jrSSz jr\SSST4             SS{ jjrSS| jr        SS} jr        SS~ jrSS jr SSS.         SS jjjrg)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)currentframe)count)
attrgetter)AnyCallableOptionalTYPE_CHECKINGTypeVarUnion)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)	chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_metrics_contextlazy_format_graph_codeset_feature_use)r   )!unwrap_tensor_subclass_parameters)aot_export_modulemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)save_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)		BoxedBoolcount_tangentsfresh_cacheget_all_devices	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )aot_autograd)ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   metrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tc                "    [         R                  $ N)dynamo_utilsidentityattrs    T/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$$    c                     g ry    )argskwargss     r~   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                       \ rS rSrSrSrSrSrg)FxCompileMode   r   rV   rO   r   N)__name__
__module____qualname____firstlineno__NORMAL	SERIALIZE
SUBPROCESS__static_attributes__r   r   r~   r   r      s    F IJr   r   c                 T   Sn [         R                  R                  U 5      nUc  [        R                  S4$ SnUR                  5       R                  S5      (       a  SnUSS  n UR                  5       n[        U   U4$ ! [         a    SS K	nUR                  " [        5      nUR                  SUU SR                  [        S	 [        R                  R!                  5        5       5      5      5        [         R                  R#                  U 5        [        R                  S4s $ f = f)
NTORCHINDUCTOR_FX_COMPILE_MODEFzasync+T   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  8   #    U  H  n[        U5      v   M     g 7fry   )repr.0xs     r~   	<genexpr>+_fx_compile_mode_default.<locals>.<genexpr>   s     O.NT!WW.N   )osenvirongetr   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__keyspop)namevalue	use_asyncr   logs        r~   _fx_compile_mode_defaultr      s    *DJJNN4 E}##U**I{{}))	ab	+U#Y.. +)		LIIfOm.G.G.L.L.NOOP		
 	

t##U**+s   %B   B$D'&D'
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metricsc                    [         R                  R                  R                  5       n[	        [        U 5      5      nU(       a  UR                  (       d  U$ UR                  R                  $ ry   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      r~   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333r   c                B   U R                   R                  SS9S   n/ n[        UR                  S   [        R
                  R                  5      (       d  UR                  S   nOUR                  nU H  n[        U[        R
                  R                  5      (       a^  UR                  R                  S5      =nb@  [        U[        R                  5      (       a!  UR                  UR                  5       5        M  UR                  S 5        M     X!R                  S'   g )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)gmrl   output_stridesoutput_node_argsr   r   s         r~   record_original_output_stridesr      s    ((%%%215KNk&&q)588==99&++A.&++"vuxx}}--..;3--!!#**,/!!$' # 3A./r   c                    U R                   R                  S[        R                  R                  R
                  S9 H0  n[        XR                  S   R                  5      n[        U5        M2     [        U 5        g )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r~   r   r      sa    ##599#9#9#I#I $  2yy|2231(;	 #2&r   c           	     "   U R                   R                  S[        R                  R                  R
                  S9 H  n[        XR                  S   R                  5      nUR                   R                  SS9 H}  n[        [        UR                  S   5      5       Vs/ s H@  n[        UR                  S   U   [        R                  R                  5      (       d  M>  UPMB     snUR                  S'   M     [        U5        M     g s  snf )Nr   r   r   r   r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   r   r   r   *_recursive_record_user_visible_output_idxs)r   r   r   idxs       r~   r   r      s    ##599#9#9#I#I $  2yy|223NN---:D !TYYq\!2353Cdiil3/? 35DII01 ; 	38<5s   !=D"Dc                 6    [         R                  " [        5      $ ry   )dynamo_loggingget_step_loggerr   r   r   r~   _step_loggerr     s    ))#..r   c                 &   [         R                  R                  5       (       an  [         R                  R                  R                  R
                  (       d:  [         R                  R                  5       S:  a  [        R                  " S5        g g g g )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr   r   r~   _warn_tf32_disabledr     sf     	

!!##**55JJ,,.&8d	
 9 6 	"r   c           
     X  ^ [        U R                  SS9 VVs/ s H  u  p#UPM	     snn5      mTR                  [        U R                  SS9 VVs/ s H  u  p#UPM	     snn5      5        SU4S jjnUR                  R
                   GH  nUR                  S:X  d  M  UR                  nUR                  S5      (       d  UR                  S5      (       d  MP  [        X5      (       d  Mb  [        U5      " U5      n[        U5      " U 5      n[        R                  " Xx5      (       a  UR                  UR                  :X  a  M  UR                  S5      (       a  SOSn	U" UR                  U	5      n
U	 U
 3nXl        [        XU5        TR                  U5        GM     gs  snnf s  snnf )	a  
In aot_export_module (make_fx), we create get_attr nodes with name prefix
"_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
torch/fx/_symbolic_trace.py

However, this might result in name collision if the original mod already
has a different buffer with the same name.

We resolve this potential name collision here by changing the target name
with a new number post fix.
Fremove_duplicatec                r  > SnU R                    H  nUR                  S:X  d  M  UR                  R                  U5      (       d  M7  [	        UR                  5      [	        U5      :  d  M[  UR                  R                  U5      S   nUR                  5       (       d  M  [        U[        U5      5      nM     T Hu  nUR                  U5      (       d  M  [	        U5      [	        U5      :  d  M5  UR                  U5      S   nUR                  5       (       d  M`  [        U[        U5      5      nMw     US-   $ )Nr   get_attrrV   )	nodesr   r   r   r   splitisdigitmaxint)r   prefixir   post_fixkeyexisting_keyss         r~   find_smallest_i0_resolve_name_collision.<locals>.find_smallest_i4  s    KKDww*$)?)?)G)Gt{{#c&k1#{{008<H''))3x=1   !C~~f%%s8c&k)"yy04H''))3x=1 ! 1ur   r   _tensor_constant_torchbind_objN)r   zfx.Graphr   strreturnr   )rN   named_parametersupdatenamed_buffersr   r   r   r   r   hasattrr
   r   equaldtypesetattradd)modr   r   r   r  r   target_name	gm_targetmodel_targetr   new_idnew_target_namer  s               @r~   _resolve_name_collisionr     s    "33U3KLK)$KLM #*;*;U*;*ST*SYTD*STU  77j ++K))" !,,-=>>3,,";/3I%k237LI44OO|'9'99 ))*<== #% 
 %RXXv6F!'1O)KB3o.7 - 	M Us   F 
F&c           	        SSK JnJn  [        X5        0 nU R	                  SS9 H  u  pgXuU'   U" UUUUR
                  S9  M     U R                  SS9 H  u  phXU'   U" UUUUR                  S9  M     UR                  R                  SS9n	/ n
U	 H  nUR                  nXR                  ;   a"  UR                  U   nU
R                  U5        M@  XR                  ;   aE  UR                  U   nU
R                  U5        [        X^   5      UR                  [!        U5      '   M  XR"                  ;   d   eU
R                  S 5        M     SSKJn  [)        UR                  R*                  5      S	   R,                  S   n/ nUR.                  nUR0                  nUR2                  n[5        U5       H  u  nnS nU[7        U5      [7        U5      -   [7        U5      -   :  a?  UR                  U;   a  UUR                     nOUR                  U;   a  UUR                     nUR                  U5        M     U" UU
U[8        R:                  " 5       S U0 5      nU$ )
Nr   )_assign_attr	_AttrKindFr   )	attr_kindplaceholderr   )_unliftr   )torch.export.unflattenr  r  r  r  	PARAMETERr  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersrg   r   ri   user_inputstorch.export._unliftr  r   r   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   pytreeLeafSpec)r  r   graph_signaturer  r  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer  outputsmutated_outputsbuffer_mutationsuser_input_mutationsr)  r   outr   unlifted_gms                            r~   _unlift_graphr<  b  sW    ?C$OQJ++U+C 4))		
 D ))5)A!4&&		
 B ++}+=)+M "II	<<<,AA)LN  0;;;);;IFK  -&z'>? GG4[AB  ; ;;;;  & " -288>>"2&++A.GO&88*@@#11Mg&S6:%&-A)BBSEWWWxx++(211,SXX6u% ' 

K r   Fc              #  &  #    [        S U R                  R                  SS9 5       5      n[        5       nU R                  5        HH  u  pEXB;   d  M  [	        U[
        R                  R                  5      (       d  M7  UR                  U5        MJ     U(       ak  U R                  R                  S[
        R                  R                  R                  S9 H+  nUR                  UR                  S   R                  5        M-     U S h  vN   g  N7f)Nc              3  8   #    U  H  oR                   v   M     g 7fry   )r   r   s     r~   r   &_get_subgraph_names.<locals>.<genexpr>  s      5<Q<r   r   r   r   r   r   )rN   r   r   named_childrenr   r   r   rI   r  r   r   r   discardr   r   )r   skip_invoke_subgraphall_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r~   _get_subgraph_namesrG    s      +5 5((---<5 + *4$&$5$5$7 
 +
%((..1
 1
 !!*- %8 HH''uyy'='='M'M ( 
D %%diil&9&9:

 !  s   AD'D=BD	D
Dc                   [        SSSS9   [        R                  n[        R                  n[	        U 5       H&  n[        X5      n[        US5      n[        XU5        M(     [        XX#5      sS S S 5        $ ! , (       d  f       g = f)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )	r%   r   add_pre_grad_passesremove_pre_grad_passesrG  r   rI  r  r`   )r   example_inputs
add_passesremove_passessubgraph_namer   new_subgraphs          r~   rI  rI    sx     
$"!8

 //
5504Mr1H5hCLB|4	 5
 r:M
 
 
s   A!A77
Bc                    [        SSSS9   [        X5       H  n[        X5      n[        X15        M     [	        U 5        S S S 5        g ! , (       d  f       g = f)N_recursive_joint_graph_passesTjoint_graph_pass_time_usrK  )r%   rG  r   rV  r]   )r   rB  rS  r   s       r~   rV  rV    sS     
'"!;
 1JMr1H)(I K 	2
 
 
   4A		
Ac                    [        SSSS9   [        U 5       H  n[        X5      n[        X15        M     [	        X5        S S S 5        g ! , (       d  f       g = f)N_recursive_post_grad_passesTpost_grad_pass_time_usrK  )r%   rG  r   rZ  r^   )r   is_inferencerS  r   s       r~   rZ  rZ    sP    	%"!9

 14Mr1H'? 5 	*
 
 
rX  c                   SSK JnJnJnJnJn  U" XX#5      n	Uc  U	" 5       OSn
[        [        U	R                  R                  5      S   R                  S   5       VVs0 s H  u  pUR                  U_M     nnn/ n/ n0 nU R                  R                   H^  nUR                  U;   a  UR                  U5        M&  UR                  U   U:X  d  M;  UR                  S:w  d  MM  UR                  U5        M`     U HC  nSUR                  -   nU" U UUc  XUR                        OSU5        UUR                     UU'   ME     USSS2    Hb  nUR                  (       a3  UR                   H!  nUR                  U   U:X  a  M   SU S35       e   MG  U R                  R!                  U5        Md     U R#                  5         U	U4$ s  snnf )	a  
This function takes an GraphModule input "gm".
The gm will be split into 2 components,
  1) const_gm, which consists the subgraph of gm that can be constant folded.
  2) gm (being inplace modified,) which returns the graph after constant folding.

If an additional "lifted_constants" argument is passed in, we will assume the gm has
been lifted and run the transformation accordingly.

When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
the nodes for which the callback returns True.

const_output_index is a mapping of corresponding node name from gm to the
output index of const_gm.
Returns (const_gm, const_output_index)
r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr   r  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingr^  r_  r`  ra  rb  r*  tupler   r   r   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnr^  r_  r`  ra  rb  const_gmconst_resultr   r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       r~   split_const_gmrt    s   ,  *
3H "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"T"T"T   MO99%""4(YYx $44M9Q  &	   )DII5" )0 49956		
 .;499-E>*   dd#::ZZvvh':5VvEU7VV5   HH% $ LLN'''Es   Gc                n   [         R                  R                  n[        UR                  R
                  UR                  R
                  UR                  R
                  UR                  R
                  /5      nU H  nU R                  R                  SUS9 H  n[        UR                  R                  SS 5      [         R                  5      (       d  M>  UR                  S   R                  [         R                   :X  d  Mk  UR                  S   R"                  R$                  S:X  d  M      g   M     g)Nr   r   r   r   TF)r   r   atenrN   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r  float32devicetype)r   rv  tf32_opsr   r   s        r~   is_tf32_warning_applicabler  C  s    99>>DGGOOJJHHLL  		
H HH''?6'JD499==5u||DDIIe$**emm;IIe$++00F: K  r   c                   [        S U  5       5      n[        R                  (       aE  [        R                  (       a0  U(       d)  [        R                  S5        [        R                  " SS9$ [        R                  R                  (       a)  [        R                  S5        [        R                  " SS9$ [        R                  " 5       $ )z
For CPU backend, enable comprehensive padding causes some unit tests
fail due to changing number of generated kernels. Skip for now.
c              3     #    U  HE  n[        U[        R                  5      (       d  M$  [        UR                  R
                  5      v   MG     g 7fry   )r   r   r   rB   r}  r~  )r   ts     r~   r   6maybe_disable_comprehensive_padding.<locals>.<genexpr>_  s3      '5!Au||9Tqxx}}~s
   #A&Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)rP  has_gpus     r~   #maybe_disable_comprehensive_paddingr  X  s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''r   c                p    U (       d  U(       a  [         R                  " SS9$ [        R                  " 5       $ )z@
graph partition does not support cpp_wrapper and aot_mode yet.
F)graph_partition)r   r  r  r  )cpp_wrapperaot_modes     r~   maybe_disable_graph_partitionr  o  s'     h||E22%%''r   c                   [        5          [        U5      nU(       d4  [        R                  R	                  SS9n[        XS9R                  " U6   O^U(       d  [        R                  " 5       O [        R                  R                  USS5      nU   [        XS9R                  " U6   SSS5        SSS5        U$ ! , (       d  f       N= f! , (       d  f       W$ = f)zq
If we can not detect fake mode from the context of inputs, create one.

The created fake mode will be returned.
Tallow_non_fake_inputs)moder  N)r   r$   r   _subclassesFakeTensorModerL   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   rP  force_allow_non_fake_inputs	fake_modectxs        r~   fake_tensor_propr  {  s     
"	#$^4	))88t8TI2.88.I 3 &&(ZZ&&y2I4P 
 r2PP#  
$    
$	#  s$   BCB:(C:
C	C
Cc                    [         R                  " U 5         [         R                  " 5       sS S S 5        $ ! , (       d  f       g = fry   )r   r  get_config_copy)config_patchess    r~   get_patched_config_dictr    s(     
n	%%%' 
&	%	%s	   6
Ac               #     #    [         R                  (       a   [        [        5       SS9   S v   S S S 5        g S v   g ! , (       d  f       g = f7f)NF)dirdelete)r   force_disable_cachesr?   r<   r   r   r~   with_fresh_cache_if_configr    s4     "" Y[7 87 	 87s   (A=A
AAc                  z    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   Srg)_CompileFxKwargsi  zOptional[BoxedBool]
cudagraphsSequence[int]static_input_idxsboolis_backwardzOptional[int]graph_idr  r  r\  zOptional[bool]
layout_optz1Optional[Callable[[list[ExternKernelNode]], Any]]extern_node_serializerzOptional[BoxedDeviceIndex]boxed_forward_device_indexr   N)r   r   r   r   __annotations__r   r   r   r~   r  r    s=    ##$$NMM ::r   r  )totalc                  .    \ rS rSr        SS jrSrg)_CompileFxCallablei  c                    g ry   r   )selfr   rP  r   s       r~   __call___CompileFxCallable.__call__  s    
 r   r   Nr   rI   rP  Sequence[InputType]r   Unpack[_CompileFxKwargs]r
  r;   )r   r   r   r   r  r   r   r   r~   r  r    s-     , +	
 
r   r  c                t   UR                  SS 5        UR                  SS5        UR                  SS5        UR                  SS 5        UR                  SS5        UR                  SS5        UR                  S	S 5        UR                  S
S 5        UR                  SS 5        [        R                  " 5        nUR                  [        R
                  R                  R                  5       5        UR                  [        [        R                  5      5        UR                  [        R                  " SSSSSSS95        UR                  [        5       5        UR                  [        5       5        [        R                   " SUS   S9  [#        [$        SS9" U U40 UD6sS S S 5        $ ! , (       d  f       g = f)Nr  r  r   r  Fr  r  r\  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namerL  log_waitcounterwaitcounter_name_overriderM  )r  inductor)compiler_name)
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrS   dynamo_configuse_lazy_graph_modulerz   r%   r  rZ   r"   pt2_compiler    _compile_fx_inner)r   rP  r   stacks       r~   r  r    s{   
 lD)
)2.
mU+
j$'
mU+
ne,
2D9
lD)
.5 
			5EKK88OOQR2=3V3VWX%%"-&* $*<)N		
 	689LN+&&}-	
 ##4JO
 
' 
 		s   8C'F))
F7zcompilation time (in seconds)r|   c                f  ^(^)^*^+ [         R                  n[        R                  R                  R
                  R                  5         [        R                  " U R                  5      S:X  a  U(       d|  SSK
Jn  SSKJn  UR                  U 5        [        R                  R                   R#                  5       n[$        R&                  " SSU0UR(                  S9  [+        U R,                  5      $ UR/                  SS5      n[0        R3                  S	U5        [5        X5      n[7        [9        [;        [=        U R                  R>                  5      5      5      R@                  S   [B        [D        45      (       d   S
U R                   35       eURG                  S5      c&  [I        [J        RL                  RN                  5      US'   [J        RP                  (       a  [S        U U40 UD6  [T        RT                  " 5       n	[W        5       n
[Y        5         [[        S S []        U 5       5        5       5      n[_        SSSS9   [J        R`                  (       + =(       a0    [J        Rb                  =(       d    U
=(       a    U(       + =(       a    Un[J        Rb                  nU
n[e        SU5        [f        R3                  SUUUU[J        R`                  5        [i        U5       H[  u  nn[7        U[        Rj                  5      (       d  M'  [m        URn                  Rp                  5      (       d  MM  X;   d  MT  SUl9        M]     SnSnSm(Sn[u        U 5      n[T        Rv                  " 5       nU(       a  [x        Rz                  " XX(U5      u  nm(Ubz  Uu  nn[f        R3                  SU5        U(       a*  [x        R|                  " 5       n[f        R3                  S5        [x        R~                  " UUUUUURG                  SS5      US9u  nm(O[f        R3                  S5        T(b	  T(S   S:X  a?  Ub   e[f        R3                  ST(b  T(RG                  SS5      OS5        [        XU40 UD6nGOOT(S   S :X  Ga  Ub   eUc   e[f        R3                  S!5        [        R                  " 5          [        XU40 UD6nUc   e[T        Rv                  " 5       U-
  UlC        Uu  nnUUlD        UUlE        [        R                  " 5       u  nnUR                  U5         [        R                  " 5         Ub  [        U5      T(S"'   UR                  T(S#'   [f        R3                  S$U5        [x        R                  " UUUUU5        O>T(S   S%:X  d   eUc   eUc   eUu  nn[f        R3                  S&U5        UUlD        UUlE        Uc   eUnT(b  T(S   OS'm)[$        R                  " S(T) 3T(=(       d    0 US)9  [$        R                  " S*T)UT((       a  T(RG                  S+5      OST((       a  T(RG                  S,5      OST((       a  T(RG                  S5      OS-UUS.9  T(b  [        S/U)4S0 jU(4S1 jS29  UR                  UUU5        SSS5        [f        R3                  S3[T        RT                  " 5       U	-
  5        [         R2                  R                  5       nU(       a%  Uu  m*m+[        S/S4 U*4S5 jS29  [        S/S6 U+4S7 jS29  [f        R                  [        R                  5      (       Ga  / n[        S8   R                  5        GH	  u  nnUR                  S95      n [        U 5      S::  a  UR                  US;S<S<S<U/5        M@  [        U 5      S=:  a  S9R                  U SS> 5      OS9R                  U SS? 5      n!U!R                  S@5      n"U"(       aF  [        U 5      S=:  a7  U S>S u  n#n$n%n&S9R                  U SS> 5      n!UR                  U!U#U$U%U&U/5        M  U S?S u  n$n%n&S9R                  U SS? 5      n!UR                  U!S;U$U%U&U/5        GM     [f        R                  SA5        [f        R                  SBR                  SCSDSESFSGSH5      5        [f        R                  SI5        U H:  n'[f        R                  SBR                  " U'6 5        [f        R                  SI5        M<     [        R                  R                  R
                  R                  5         [        5       " [        R                  SJUS   (       a  SKOSL SMUSN    35        W$ ! [        [        4 a    e [         a3  n[        U[        5       5      R                  UR                  5      SeSnAff = f! [        R                  " 5         f = f! , (       d  f       GN:= f)Oz
Inductor API that compiles a single graph.

If you change the argument list for this function, make sure you
also update the call to save_args_for_compile_fx_inner below accordingly.
r   )CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nc              3  D   #    U  H  nUc  M  UR                   v   M     g 7fry   )supports_caching)r   backends     r~   r   $_compile_fx_inner.<locals>.<genexpr>)  s(      #
G  	!  
s     c              3  j   #    U  H)  n[        UR                  [        R                  5      v   M+     g 7fry   )rX   r~  r   r  r   r}  s     r~   r   r  +  s+      
- +6;;8J8JKK-s   13fx_codegen_and_compileT)rL  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r  time_nsr  r  
componentszcache not enabled)r  cache_event_timer  r  r  remote_cache_enabledlocal_cache_enabledartifactc                    > ST  3SS.$ )Nr  jsonr   encodingr   )r  s   r~   <lambda>#_compile_fx_inner.<locals>.<lambda>  s    -k]; &%r   c                 0   > [         R                  " T 5      $ ry   r  dumps)
cache_infos   r~   r  r    s    4::j#9r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsc                     SSS.$ )N,inductor_generated_kernel_to_post_grad_nodesr  r  r   r   r   r~   r  r    s    F"!r   c                 0   > [         R                  " T 5      $ ry   r  )
debug_infos   r~   r  r    s    tzz*5r   c                     SSS.$ )N*inductor_provenance_tracking_node_mappingsr  r  r   r   r   r~   r  r    s    D"!r   c                 0   > [         R                  " T 5      $ ry   r  )node_mappingss   r~   r  r     s    tzz-8r   aten_mm_info_   -?   )rz  r{  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )dro   aot_compilationr   	_inductorasync_compileCompiledTritonKernelscache_clearrz   count_callsr   torch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr"   log_instant_eventPT2_COMPILEr,   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr   r   re  r   r   r=   r   tritonr  	save_argsr5   timerD   rY   allr@   r%   r  fx_graph_cacher)   r   r*  r   rB   r}  r~  _is_inductor_staticr8   r  r/   prepare_keyget_remote_cacheload_with_keyr  re   begin_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linescollectset_triton_bundlerQ   rR   	Exceptionr\   r   with_traceback__traceback__end_compiler	  _save_graphinstantr  rG   post_compile1log_inductor_triton_kernel_to_post_grad_node_infoisEnabledForr   INFOr#   itemsr   r   r   r   endswithr  formatr   ),r   rP  graph_kwargsr  r  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter  inputmb_compiled_graphkey_inforemote_cacher  
start_timer  debug_lines	cache_keytriton_bundler  ecompiled_graphprovenance_infomm_table_datar   partsr   
is_batchedbatchmrs  krowr  r  r  r	  s,                                           @@@@r~   r  r    s	    &&H 
OO!!77CCE)Q.x 	=@((,]]11DDF
,,"J/*66	
 rzz**'3'>'>?RTV'WDFWX-nPOd4 89:??BUDMRR 
QRTRZRZQ[\R %-%.v}}/G/G%H\"&	
 	
 IIKE<> " #
)"-
#   
 d
 +++ )&&?*?)) )	 	 %%&
I.		f''	
 ".1HAu5%,,//5<<,,--*,0) 2 37
226	 \\^
%1%=%=L6&"Xz
 ##+ [		6<#/#@#@#BLII560<0J0J"  , 0 0 F'1-!: 		;<
 M!:h!F$,,,II, "- NN#8)DE !7O!7C!
 &&0$,,,'''IIDE''),$:%;G%! )44437<<>J3N!0)1&	;8A!5@K!= "))+!'!33MB ))+".478K4L
01*;*J*JJ'IIF	R$$! m,555$000''''/$YII19=4=1<G9 ,,,* *4)?J}%Z 	 	""k]+%2	
 	&&#')3
u%7Az~~l3t  45(!' %	
  ! : 	##NI|Li
l II5tyy{U7JK ggOOQO 	
 6	
 	 9	
 %%">288:JCIIcNE5zA~$$c3S#u%EF ,/u:?388E#2J'sPR@TD'9:Jc%jAo!&rsq!Qxxcr
+$$dE1aE%BC  *1axxcr
+$$dCAq%%@A) ;, 	78AHHS#sG	

 	 CHHJQQSVWXHHY ! 
OO!!77CCEN'&}5;:
F Gj)*	, s %i0  #A|~6EEOO
 ))+[
 
sR   <Cf!"f!'f!.E0f!A+d7E#f!7f.e??ffff!!
f0c                  .    \ rS rSr% SrS\S'   SS jrSrg)	_FxCompileStati4  r   r   codegen_and_compilec                     SU R                    3$ )Nzcodegen_and_compile: )rg  )r  s    r~   __repr___FxCompileStat.__repr__8  s    &t'?'?&@AAr   r   N)r
  r	  )r   r   r   r   rg  r  ri  r   r   r   r~   rf  rf  4  s      Br   rf  c                  p    \ rS rSr% Sr\" \5      rS\S'   \	          S	S j5       r
\S
S j5       rSrg)	FxCompilei<  zU
An FxCompile represents a mechanism that can turn a GraphModule into an
OutputCode.
z%dict[type[FxCompile], _FxCompileStat]_compile_statsc                    g ry   r   )r  r   rP  rL  rK  s        r~   rg  FxCompile.codegen_and_compileH  s     r   c                8    U R                   R                  5         g ry   )rm  clear)clss    r~   _reset_statsFxCompile._reset_statsQ  s      "r   r   N
r   rI   rP  r  rL  r  rK  r  r
  r;   r
  None)r   r   r   r   __doc__r   rf  rm  r  r   rg  classmethodrs  r   r   r   r~   rl  rl  <  sr     =H<WN9W
  , '	
 ' 
  # #r   rl  c                  <    \ rS rSr\          SS j5       rSrg)_InProcessFxCompileiV  c                  ^^/^0^1^2 SU;   a  US   c   eUS   nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      n	[        R                  n
UR                  S	S5      nUR                  S
S5      n[        S5      R	                  5          [
        R                  " 5          [        R                  =nb,  SSK	n[        R                  SU5        UR                  " U5        [        T5      (       a
  [        5         [        S   R!                  5       n["        R$                  " ['        ["        R(                  " 5       S5      5        [+        5       " [,        R.                  SU(       a  SOS SU 35        [0        R2                  " 5       n[4        R6                  R8                  R:                  R=                  UTUSSS9  UR?                  5       m2[A        SS U24S jS9  [        RB                  RE                  TU5        [G        U5      n[I        T5        [K        SSS9   [4        RL                  " 5          [O        TU5      nSSS5        SSS5        [Q        T5        [A        SS U4S jS9  [        RR                  " W5         [U        T5      nU   [W        TUS9  SSS5        [        RB                  RY                  TU5        [Z        RC                  S[]        S TSSSS!95        TR_                  SSSSS"9m/[A        SS# U/4S$ jS9  [        R`                  Rb                  (       ab  [4        Rd                  Rf                  Ri                  TRj                  5      m1[A        SS% U14S& jS9  T1[4        Rl                  RB                  l7        [q        5       nURs                  5       (       aa  ["        Rt                  S':  a!  [w        [        S(   Ry                  5       5      nO[        S(   R{                  5       n[|        R~                  " SUS)9  [        R                  " 5       (       a   [        S*[        [        5       5      0S+9  SSS5        [        RR                  " U5         [        U5         [        X5         SnSnSnSnU
(       a  [        R                  R                  (       ap  [        TS- S.9u  nn[        U/ UUU	U
UUUSS/9
n[        R                  " U5         U	(       d   S05       eUR                  5         UR                  5       u  nnSSS5        [        TUUUU	U
UUUUU(       a  UR                  OSU(       a  UR                  OSUUS19n[        R                  " 5       nUR                  5         [        R                  " U5         UR                  " U6   / nUR                  b  [        5       m0UR                   H  n[        U[        5      (       ay  UR                  5       (       ad  [        [        UR                  5       5      5      S:X  a>  UR                  [        U04S2 jUR                  5       R                   5       5      5        M  UR                  S5        M     [        U5        Sn[K        S3SS9   UR                  (       GaX  S4S5K`Jan   UR                  (       d   S05       eUR                  5       u  n!n"[        RB                  " S6U!R                  5        U"R                  (       a!  [        RB                  " S7U"R                  5        Sn#UR                  (       a2  UR                  UR                  5      n#[        RB                  " S8U#5        [K        S9SS9   U R                  UU!R                  U"R                  U#UR                  / [        R                  UR                  R                  U(       a  UR                  R                  O/ -   5      QS:9n$SSS5        O)UR                  5       n%U%R                  n$[        U%S;S5      nSSS5        [        R                  [,        R.                  5      (       ay  UR                  5       u  n&n'n([        =R                  U&-  slr        [        =R                  U(-  sls        [        =R                  U'-  slt        [        R                  S<U&U'U(S=.5        U(       GaW  [        R                  R                  (       Ga7  [        Rj                  R                  (       Gd  [4        Rl                  R                  R                  " U6 (       a  Sn)TRj                  R                   H  n*U*R                  R                  S>S5      n+U*R                  S?:X  dM  [        U+[4        R                  5      (       a.  [4        Rl                  R                  R                  U+5      (       d  M~  U*R                  R                  S@S5      =n)(       d  M    O   SAn,U)(       a	  U, SBU) SC3n,OU, SC3n,U,[        Rj                  lx        U(       a  [        Rj                  R                  (       db  [        T5      n-U-(       aP  SDU-GR                    3n,U-R                  R                  S@S5      =n)(       a  U, SBU) SC3n,U,[        Rj                  lx        [        R                  (       a`  [        W$[        G[        45      (       d   eG[        U$5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ U(       aY  [        Rj                  R                  (       d:  SSEKJn.  U." [        Rj                  GR
                  5      [        Rj                  lx        U GR                  G[        U 5         =GR                  S4-  sl        G[        W$UTU[        Rj                  R                  UGR                  5       [        S   U-
  UUUUUT2T/U5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ ! , (       d  f       G	N= f! , (       d  f       G	N= f! , (       d  f       G	N= f! [         a    [        R                  S,5         GN>f = f! , (       d  f       GNI= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        g! , (       d  f       g= f)FzC
Generates the OutputCode from the GraphModule and example_inputs.
r  Nr  r   r  Fr  r  r\  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r  r  r  )save_dirr  c                     SSS.$ )Nfx_graph_runnablestringr  r   r   r   r~   r  9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (%r   c                    > T $ ry   r   )runnable_graph_strs   r~   r  r    s    #5r   r  additional_fake_tensor_propTrL  c                     SSS.$ )Nbefore_post_grad_graphr  r  r   r   r   r~   r  r    s    4 (%r   c                 &   > T R                  SSSS9$ )NFTprint_outputinclude_strideinclude_device)print_readable)r   s   r~   r  r    s    2#4#4!&tD $5 $r   r\  %szAFTER POST GRADr  r  colored)r  r  r  fast_sympy_printc                     SSS.$ )Nafter_post_grad_graphr  r  r   r   r   r~   r  r    s     7$,)r   c                    > T $ ry   r   )inductor_post_grad_graph_strs   r~   r  r    s    'Cr   c                     SSS.$ )Ninductor_post_to_pre_grad_nodesr  r  r   r   r   r~   r  r    s    $E(.-r   c                 0   > [         R                  " T 5      $ ry   r  )provenance_tracking_jsons   r~   r  r    s    4::6N+Or   )r  
   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                   U R                   S:H  =(       am    [        U R                  [        5      =(       aL    U R                  R	                  S5      =(       d*    [        U R
                  R                  SS 5      [        5      $ )Nr   r  r   )r   r   r   r	  r   r   r   rF   )r   s    r~   r  r  )  s_    $''Z:O ;&t{{C8; !KK223CD X)$))--t*DFVW	;r   )rk  )	rP  	shape_envr  r  r  r  r\  r  is_const_graphz"AOT mode only supports C++ wrapper)rP  r  r  r  r  r  r\  r  rq  const_wrapper_codeconst_kernel_codeconst_modulerL  c              3  F   >#    U  H  nTR                  U5      v   M     g 7fry   )doprint)r   sps     r~   r   :_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>o  s     )X@W1!))A,,@Ws   !zGraphLowering.compile_to_fnrV   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerzGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op ) check_lowering_disable_cudagraph)r   ro   r  rM   guardrz   preserve_rng_stater   sleep_sec_TESTING_ONLYr1  r   warningsleepr  r   r#   copysyssetrecursionlimitr   getrecursionlimitr   r   rG  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerG   r*  fx_graphrn   r_   r%   no_gradr  r   set_fake_modeget_cuda_device_contextrZ  fx_graph_transformedpost_grad_graphs_logr(   r  traceenabledr   	tracebackget_graph_provenance_jsonr   r   _inductor_post_to_pre_grad_nodesr'   in_progressversion_infosumvaluesr  r"   compilation_metric	is_fbcoder   r	  r  r>  r  r  r  r  rt  ra   set_graph_handlerruncodegen_with_cpp_wrapperr   rW   CachedMetricsHelperfreeze_runtime_assertsgraph_outputsrK   r   rc   has_tensor_outputr   rJ   
get_strider   re  
get_layoutr   _check_triton_bf16_supportr  	codecacher  r  r0   extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   inductor_metrics_logrF  count_bytesr  r  r  r  r/  cudagraph_skip_dynamic_graphsdisable_cudagraphs_reasonr   any_is_symbolicr   r   r   r   rj   r   r   r6   torch._inductor.cudagraph_utilsr  device_node_mappingrm  r~  rg  r7   
get_deltas)3r  r   rP  rL  rK  r  r  r  r  r  r  r\  r  	sleep_secr1  inductor_countersfdr  r  cuda_contextmetrics_contextr  rq  const_graphr  r  rl  r   metrics_helperr   r:  compiled_fn_runnerr  r  kernel_codeserialized_extern_kernel_nodescompiled_fncompiled_module	num_bytesr  r  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  r  s3    `                                             @@@@r~   rg  '_InProcessFxCompile.codegen_and_compileW  sc     |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB"."2"2:t"D(,,]EB**)--neD5t< 	
 JKQQS++-#:::	GI9 

9%)"--#% ( 4 9 9 ; !!#c&;&;&=t"DEN*"-;:> ?!
$ BMM))::B
T ;  "$ 6 GGR0
 .n=I$ B-T ]]_ 0^ DI % 6b9
 	 +6r:!/N ",,R@$***)'+'+ $	 02/@/@!&#'#'%)	 0A 0, !!  D <<''**DDRXXN - %"% $P 1 OO))J #6"7"..00'''1+.x/F/M/M/O+P(+3M+B+H+H+J(&99"&9I ##%%	A, -s3J3L/M+A ,V 	*3NC-kD%)""%)"$(! 3 3 P P 4B.40H0 #0 ')"+!)$/!)/E%1$/'+#K ,,[9*P,PP{#)'@@B >*,= : & $2'% +%+A!- +'94F*00D 4E)//$!,$3+. ")!<!<!> ,,.((/II~.QSN**6 +,#(#6#6C *3 7 7$'$9$9$;$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ; $7 /u5 *.&%5T !>>>B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B:$88$)$@$@(-(A(A%& !?
 !0 5 5$J$B!"
 ". 9QU" />.E.E$)$0$6$6$/$5$5$B050A0A	6&)-,1,>,>,O,O 4? 1<0H0H0Y0Y57	-.**	6& /F /"	" ", /4.E.E.GO*9*>*>K18 /42.mt ,88FFCHCTCTCV@	>=22i?2-->-...@.,1106?2@1> #"MMGGG ! A A A!OO11AA>R&*$&HHNND'+yy}}UD'AH $= 8'1(ELL'I'I','<'<'L'LX'V'V (.2iimmM4.PP{P % %3 #|&)0	k]"&MG)0	nG<C9!!''*K*K.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG=(()+T{CCCC+K8S 0/G ED DC +*M .- TSp "!''*K*K
 = ! ; ; 9 ''T
3GG1LG*#&99&113 ,/@@"&)$'*4*s 0/G ED DC +*M .- TST %_ 8 "\D % A $?@AK ,+Z :9z" "= 9 0//G EDD DCC +**M .-- TSSs  .u-E<u qq	#q+9u$r"2q*	=Fr"q< u>t9
t	A%t;2r4-A?t,C2s*C&sA<s 2s2Gs*
C6s* 	t		t		t9	u$	u-7Cs*	t	t	!	t9*	u3	u-
qq
q'"u*
q94r"<r	r"r	r""
r1,u4
s>t
ss
s'"s**
s84t<	t	
tt		t9
t-)t90	u9
uu
	u-
u!	u--
u;r   Nru  )r   r   r   r   r   rg  r   r   r   r~   r{  r{  V  sK    qq ,q '	q
 'q 
q qr   r{  c                p   [         [        R                  :X  a  [        5       nOK[         [        R                  :X  a  SSKJn  U" 5       nO%[         [        R                  :X  a  SSKJ	n  U" 5       n[        (       a,  SSKJn  SSKJn  [        WU5      (       d   S5       eU" U5      nWR                  XX#5      $ )NrV   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)fx_compile_moder   r   r{  r   compile_fx_extr  r   compile_fx_subprocr  fx_compile_asynccompile_fx_asyncr  r  r   rg  )	r   rP  rL  rK  schemer  r  r  r  s	            r~   r  r    s     -...$&	M33	38%'	M44	4<%'5:&"899 	
E	
9 !(%%b/XXr   c                   / n[        U 5       H  u  p4[        U[        R                  5      (       d  M&  [	        UR
                  R                  5      (       d  ML  [        5          X1;   a  [        U5      (       a   SSS5        Mw  [        U5      (       d   SSS5        M   SSS5        UR                  U5        M     U$ ! , (       d  f       N%= f)z
This function runs at compile time, and generates a list of indices for which we
might need to do a copy to preserve alignment requirements.
N)r*  r   r   r   rB   r}  r~  rk   rE   rC   r   )inputsr  ids_to_checkr  rS  s        r~   r+  r+  -  s     Lf%%..ell''((02 %*;E*B*B	 32
 /u55 32
 6 3 	A) &,  32s   $C C  
C	r   )r  placeholdersmutated_input_idxsc                 ^ ^^^ SSK Jn	  [        R                  R                  (       aC  [
        R                  " U	UUUUUUU[        R                  R                  R                  5       S9	mO[        mS mSUUU U4S jjn
U
$ )Nr   )cudagraphify_impl)device_indexstack_tracesr  r\  r  r  r  r  c                   > Tc(  [         R                  " 5          T" TU T5      mS S S 5        T" U 5      $ ! , (       d  f       N= fry   )rz   r  )
new_inputsr  cudagraphify_fnmodelr  s    r~   r  cudagraphify.<locals>.runr  s=    002-eZARS 3:&& 32s	   5
A)r  r  r
  r   )torch._inductor.cudagraph_treesr  r   r/  cudagraph_trees	functoolspartialr   r   r$  r%  )r  r  r  r  r  r\  r  r  r  new_cudagraphify_implr  r  r  s   ``         @@r~   cudagraphifyr   P  sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jr   c                    [         R                  " U R                  5       U R                  5       U R                  U R
                  S9$ )z)
Copy and input while preserving strides
)r  r}  )r   empty_stridedsizer   r  r}  )r   s    r~   static_inputr$  |  s/     qvvx177188TTr   c                R    [        X5      n [        X5      nU R                  U5        g)z=Index into expanded dimensions of both dst and src then copy_N)r:   copy_)dstsrcexpanded_dimss      r~   index_expanded_dims_and_copy_r*    s#     c
1C
c
1CIIcNr   c                  ^^	^
^^^ [        UT5      n[        [        UT5      5      m[        X5        [	        U[
        5      (       d   e[        U5       VVs/ s H  u  pEUT;  a  [        U5      O/ PM     snnm[        U5       VVs/ s HG  u  pE[	        U[        R                  5      (       d  UO UT;  a  [        U5      OUR                  5       PMI     snnm[        [        UT5      5       H@  u  nu  pV[	        U[        R                  5      (       d  M)  UT;  d  M1  [        TU   XV5        MB     [        R                  R                  5         [        R                  R!                  5       nUR#                  [        R                  R%                  5       5        [        R                  R'                  U5         U " [        T5      5        SSS5        UR                  5         [        R                  R%                  5       R#                  U5        [        R                  R                  5         [        R                  R)                  5       m
[        R                  R+                  T
USS9   U " [        T5      5      mSSS5        [	        T[
        [,        45      (       d  T4m[.        R0                  (       a  SU
UUUU4S jjnO8[3        [5        T5      5       Vs/ s H  oDT;  d  M
  UPM     snm	SU	U
UUU4S jjn[7        X[        5       5      $ s  snnf s  snnf ! , (       d  f       GNY= f! , (       d  f       N= fs  snf )zI
Assumes inputs[static_input_idxs[i]] are always the same memory address
Nthread_local)streamcapture_error_modec                  > [        T5      [        U 5      :X  d   e[        [        TU T5      5       H  u  nu  p#n[        U[        R
                  5      (       d  M*  [        U[        R
                  5      (       d   eUT;   a&  UR                  5       UR                  5       :X  d   eMw  [        X#U5        M     U R                  5         TR                  5         T	$ ry   )
r   r*  zipr   r   r   data_ptrr*  rq  replay)
r  r   r'  r(  r)  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r~   r  cudagraphify_impl.<locals>.run  s    }%Z8882;M:/AB3..c "#u||44!#u||4444++<<>S\\^;;;
 2#MJ3 LLN!!r   c                   > T H<  nTU   nX   n[        U[        R                  5      (       d   e[        TU   X25        M>     U R	                  5         TR                  5         T$ ry   )r   r   r   r*  rq  r2  )	r  r   r)  r(  copy_indicesr   r3  r4  r5  s	       r~   r  r6    sa    # 23 7 o!#u||4444-mC.@#U	 $
 LLN!!r   )r  list[InputType]r
   Callable[[list[InputType]], Any])r+  rN   rm   rh   r   r   r*  r9   r   r   r$  detachr0  r*  r   synchronizeStreamwait_streamcurrent_streamr-  	CUDAGraphr   re  r   size_assertsr   r   rf   )r  r  r  check_input_idxsr   r   r)  r-  r  r8  r   r3  r4  r5  s     `      @@@@@r~   r  r    s    /v7HI)3#F,=>* 64fd####  ''FC !$+< <!"D'  '	 (FC a..  ++ a	 (	M $-S9K-L#Maa&&36G+G)-*<aO $N
 
JJZZ F
uzz0023			6	"d=!" 
#
	JJ++F3	JJ JJ  "E			%>		RtM23 
SntUm44(*	" 	"* !]!34
4CCT8TC4
	" 	" (z|LLY	* 
#	" 
S	R6
s1   L	AL<L .L2	M+M 
L/2
M c                   [        U [        5      (       d   U 5       e[        U 5        Uc  SS0O0 UESS0EnUR                  S[        R
                  R                  5      nU(       a  UR                  S5      (       a   S5       eO0 UES[        U R                  5      0EnUR                  SS 5      nU R                  R                  SS 5      n[        R                  R                  U5      n[        R                   " S5         [        R                  R#                  U5         [%        SSSS	9   ['        5          [)        U U[*        R,                  " UUS
9US9n[        U[.        5      (       d   eUR0                  sS S S 5        sS S S 5        sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)Nr  Tzaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.r  dynamo_compile_idcompile_fx_aot)rL  reset_event_log_on_exit)r  )inner_compiler  )r   rI   r*   r   r   r  output_pathrI  r.   coder   r   r   r   r$  ro   set_aot_compilationcompile_contextr!   r'   
compile_fxr  r  r6   filename)	model_example_inputs_rG  r  rH  r  saved_compile_idsaved_compile_contextcompiled_artifactss	            r~   rE  rE    s    fk**2F2* &f- ! 
44t4  !$$"F$7$7$C$CK ''// 	
R	
//

&	&++(>

 ,//0H$O{{':DA!MM889IJ	d#%%&;<"&$(	

 	'#++'= *
 ,l;;;;!** 		
 	
 	=< 	$# 		
 	
 	
 	=<< 	$##sa    G1$G0F=;AF"	=	F=	G	G1"
F0,F=4	G=
GG	G1
G%	!G11
G?c                  ^^^^ SSK JnJn	  [        U 5        [        R
                  " U SS9n
U
(       a  [        XS5        U" U 5        U	" UU U5      u  nmT Vs/ s H  oU   PM	     nn[        U5      nUR                  R                  Gt pUR                  S   n[        U5       VVs/ s H3  u  nn[        U[        R                  R                  5      (       d  M1  UPM5     snnUR                   S'   / n[        R"                  R$                  R'                  5       nS/mSmUGbA  UR(                  c   eUR(                  n[+        S[-        U5      S-
  5      m[.        [0           " 5       nUR2                  nUc   eSn[-        U5      S:  a  / m[5        [-        U5      5       HN  nUT;  a   S UU'   US:  a  UU   UUS-
     :X  a  US-  nOUR7                  UU   5        TR9                  U5        MP     UR:                  c   e[5        [-        UR:                  5      5       H  nUU;  d  M  S UR:                  U'   M     UR<                  (       a  UR<                  R>                  n[@        RB                  RE                  USS5         U" UUUUUSUU
S9mS S S 5        [F        RH                  (       a  T$ S
UUUU4S	 jjnSUl%        U$ s  snf s  snnf ! , (       d  f       ND= f)Nr   )%convert_conv_weights_to_channels_lastfreezeTr  r   rV   r  )r  r  r  r\  r  r  c           
        > T Vs/ s H  nXT[        UT5         -
     PM     nnU R                  5         T" U5      $ s  snf ry   )minrq  )r   r  args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      r~   wrapper%fw_compiler_freezing.<locals>.wrapper  sU     +
* +C>,BCCD* 	 
 	

!(++
s   >)r   zlist[object]r
  zSequence[torch.Tensor])&torch._inductor.freezingrT  rU  rV  ra   decide_layout_optr  r$   r   r   r   r*  r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesr   r   rN   r   params_unwrapped_to_flat_indexr   r  r   params_flatr   r   r   r  r  ro   r  _boxed_call)aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsrG  r  r  forward_devicerT  rU  r  	opt_modelindr  r  model_outputs_nodemodel_outputsr   rs  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  r]  rY  rZ  r[  r\  s                              @@@@r~   fw_compiler_freezingrs  .  s    W ""45001CRVWJ+F-.@A'-($I$ >SS=RcS1=RS !34I '__22Q&++A.M#M2;2QjEHHMM6R2;67 mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9 0 **666s?6678A5515++A. 9 && / ; ; P P			9&=t	D*/!'5!	
 
E 	!!, , GNQ T;L 
E	Ds   K
)0KK	K
K#c                     [         R                  R                  (       a  [        [	        S5      5        [         R                  R
                  b  [         R                  R
                  O	[        5       SSSS.$ )Nzcpp wrapper enabledFT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubin)r   r/  r  r3   r2   autotune_at_compile_timerU   r   r   r~   get_cpp_wrapper_configrv    sY    }}+'(=>	
 }}55A MM22$)""
 
r   c                R   [         R                  R                  5       (       d  [        R                  " 5       $ [        S [        U 5       5       5      n[        U5      S:X  a1  [         R                  R                  [        [        U5      5      5      $ [        R                  " 5       $ )zP
Returns a cuda device context manager if there is a single device in the graph
c              3  H   #    U  H  oR                   S :X  d  M  Uv   M     g7f)r   N)r~  r  s     r~   r   *get_cuda_device_context.<locals>.<genexpr>  s      806KK64I0s   "	"rV   )r   r   r   r  r  rN   r@   r   r}  r,  r-  )r   cuda_devicess     r~   r  r    s     ::""$$%%''-7 8,R08 .L |! 	

$tL123 ##%r   c                R  ^ ^^!^"^#^$ U(       aG  [         R                  " U5         [        T U[         R                  " U5      " T5      UUS9sSSS5        $ [         R                  (       Ga  [         R                  " SS0[	        5       E5         [
        R                  " U5         Un[        T [        5      (       Ga$  T R                  R                   Vs/ s H0  nUR                  S:X  d  M  UR                  R                  S5      PM2     nnU V	s/ s H&  n	[        U	[        R                  5      (       a  U	OSPM(     nn	[!        S U 5       5      (       a  [#        [%        5       X5       Ho  u  pnUc  M  [        U[        R                  5      (       d   eUR&                  UR&                  :w  d  MH  [)        SU
 S	UR&                   S
UR&                   S35      e   UnSSKJn  [/        U5      nU" T U0 U5       u  nnnnn[        UU[0        R2                  " TSS9UUS9sSSS5        sSSS5        sSSS5        $ [0        R2                  " [        TUUS9n[5        T 5      (       d  [7        T UU5      $ [        T [        5      (       a6  [        T R                  R8                  [:        5      (       a  [=        T UU5      $ [?        [@        RB                  5         [E        5          [        RF                  RH                  RK                  [         RL                  RN                  5         [        T [        5      (       a  [Q        SS U 4S jS9  [R        RU                  S[W        ST SSSS95        [Y        T R                  5      [        RZ                  RT                  l.        [_        T U5      m [Q        SS U 4S jS9  [!        S U 5       5      (       a(  [a        T UU5      sSSS5        sSSS5        sSSS5        $ [         Rb                  (       a   e[e        U5      m$[g        [         Rh                  Rj                  5      m![m        S5      m"[o        [p        5      m#Ub  UO	[s        5       n        S*U!U"U#UU U$4S jjn[0        R2                  " USS9n[u        [v        U5      n[         Rx                  (       a9  [        Rz                  " 5       (       d  [0        R2                  " [|        T T$TT!T#T"S9nO%[0        R2                  " USS9n[u        [v        U5      n        S+S jn[        SS9      S,U!U"U#U4S  jj5       n[u        [v        U5      n[/        U5      =(       d    [        R                  R                  SS!9n[        R                  R                  R                  5       =(       d    [        R                  R                  U5      n[
        R                  (       Ga{  [        R                  " SS"9   [        T USUS#9u  nnSS$KHJIn  U" U5      nUR                  R                   H  nUR                  S%:X  d  M  SUR                  ;  d  M'  [        UR                  5      " U5      n[        U[        R                  5      (       a  UR                  USS&9UR                  S'   M  [        U[        R                  5      (       a9  [        R                  R                  R                  UU5      UR                  S'   M  [        U[        5      (       d  M  UUR                  S'   GM     SSS5        [        T WW5      nS'T R                  ;   a  T R                  S'   UR                  S''   S(T R                  ;   a  T R                  S(   UR                  S('   [        R                  R                  5       nU(       a  [        R                  R                  O[        R                  n[
        R                  " U5         [        R                  " 5          U" 5          U" UU5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ [
        R                  " U5         [        R                  R                  U5         [        R                  " 5          [        R                  " SS"9    [        UUUUUST!T"US)9	" T U5      sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        sSSS5        $ ! , (       d  f       GN= fs  snf s  sn	f ! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        GM-  ! , (       d  f       GN<= f! , (       d  f       GNV= f! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        GMg  ! , (       d  f       GNv= f! [         a  n U R                  5       SeSn A ff = f! , (       d  f       O= f SSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        O! , (       d  f       O= fSSS5        g! , (       d  f       g= f)-a  
Main entry point for compiling given FX graph.  Despite the fact that this
lives in :mod:`torch._inductor`, this function is responsible for calling
into AOT Autograd (and we will eventually get a callback to
``inner_compile`` to perform actual compilation.  In other words, this
function orchestrates end-to-end compilation for the inductor backend when
you use :func:`torch.compile`.

NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
mutate it!  Make a copy if you need to preserve the original GraphModule.
)rG  decompositionsignore_shape_envNr  Fr  r   c              3  (   #    U  H  oS Lv   M
     g 7fry   r   )r   vs     r~   r   compile_fx.<locals>.<genexpr>  s     :k}ks   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.r   )_fakify_script_objectsT)r  r  c                     SSS.$ )Nbefore_pre_grad_graphr  r  r   r   r   r~   r  compile_fx.<locals>.<lambda><  s    3 (%r   c                 X   > T R                  SSSS9S[        T R                  5       3-   $ NFTr  z

 # graph id: r  idr   rN  s   r~   r  r  @  7    6#8#8!&tD $9 $ &b&6%78$9r   r  r  zBEFORE PRE GRADr  c                     SSS.$ )Nafter_pre_grad_graphr  r  r   r   r   r~   r  r  T  s    2 (%r   c                 X   > T R                  SSSS9S[        T R                  5       3-   $ r  r  r  s   r~   r  r  X  r  r   c              3  X   #    U  H   n[        U[        [        [        45      v   M"     g 7fry   )r   r   re  r  r   s     r~   r   r  a  s!     K?az!dE4011?s   (*c                R  > [         R                  " S5         U(       a  [        U 5        [        R                  R
                  R                  T[        U5      5      n[        U 5      n[        R                  (       Ga\  [        R                  " UR                  6 n[        U5      n[        R                  R                  R!                  5       nUb/  UR"                  (       a  U(       d  UR"                  R$                  nOSn['        T[(        5      (       aX  TR*                  R,                  Gt pU
R.                  S:X  d   e[        R0                  " U
R                  5      u  p[        U5      nOUnX::  d   eX-   nX::  d   e[3        X5       Vs/ s H2  n['        X^   [        R4                  R6                  5      (       d  M0  UPM4     snUR8                  S'   O/ UR8                  S'   [;        U 5        T" U U[=        U5      TTUTS9sS S S 5        $ s  snf ! , (       d  f       g = f)Nz$compile_fx.<locals>.fw_compiler_baser   r   r   )r  r  r  r\  r  )rz   r%   rV  r   r  r   num_fw_fixed_argumentsr   rl   r   keep_output_strider+  arg_tree_leavesr   r   r   r   r   num_mutated_inp_runtime_indicesr   rI   r   r   r   tree_flattenr   r   r   r   r   r   )r   rP  r\  r   rl  rm  num_model_outputsr   original_output_start_indexr  orig_model_outputs_nodeorig_model_outputsnum_orig_model_outputsorig_output_end_idxr   r  ri  r  rG  rN  rh  s                  r~   fw_compiler_base$compile_fx.<locals>.fw_compiler_base  s   
 **+QR1"5--DD&N(; &1_",,,$*$:$:<N<S<S$TM(+M(:%#mm::BBDG*w/B/B<#//OO 4 783!&+666<ll6H6H3699XEEE060C0C3881-* 255G1H.1B.1FFF  4L (
 /CCC $)7$K$C &m&8%((--H	 $K&++,FG KM&++,FG
 ;2>$"&;E&B)%!-/=M SRnKo SRs$   E9H/HH
?HH
H&r  )rg  rh  rG  r  r  ri  c                   [        U 5      nU   [        U SS9  S S S 5        UR                  SS 5      n[        R                  " SSS9   [        U U4SUS.UD6sS S S 5        $ ! , (       d  f       NO= f! , (       d  f       g = f)NT)rB  static_lifetime_input_indicesr   r  r  )compilerr  )r  rV  r   rz   r%   r   )r   joint_inputsr   r  r  s        r~   partition_fn compile_fx.<locals>.partition_fn  s    
 326L .btL	  BH/B) **5T ;  (2O	
    s   A"A3"
A03
Bbackward)r  c                @  > SSK Jn  [        R                  " S5         U   [	        U 5      n[
        R                  (       av  [        R                  " UR                  6 n[        U5       VVs/ s H2  u  pV[        U[        R                  R                  5      (       d  M0  UPM4     snnUR                  S'   O/ UR                  S'   [!        U 5      n[
        R"                  (       a  [
        R$                  " ['        5       5      O[(        R*                  " 5          T" U U[-        [/        U5      5      TST
T	S9sS S S 5        sS S S 5        sS S S 5        $ s  snnf ! , (       d  f       O= f S S S 5        O! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)Nr   )compile_lockzcompile_fx.<locals>.bw_compilerr   T)r  r  r  r  r  )torch._dynamo.convert_framer  rz   r%   rl   r   bw_outputs_user_visibler+  r  r   r*  r   r   r   r   r   r>   r  r  rv  r  r  r   r   )r   rP  r  rl  rm  r   rs  r   r  ri  r  rG  s           r~   bw_compilercompile_fx.<locals>.bw_compiler	  sK    A ))*KL%0_"11$*$:$:<N<S<S$TM '0&>K&>FC%a7 &>K&++,FG KM&++,FG&r* )) LL!7!9:#//12 )&*.uU|*<#-$(!)3A	2 2  MLK2 2 2  MLLsZ   FAE5-/E
 E
&A4E5E8	E5	FE5
E($E5,	F5
F	?F
Fr  )unlift_effect_tokens)trace_jointr|  )_detect_fake_mode_from_gmr   )static_shapes dynamo_flat_name_to_original_fqnrD  )	fw_compilerr  inference_compilerr|  r  keep_inference_input_mutationsr  r  r}  )r   rI   rP  r  r\  r  r
  r;   )r   rI   r  zSequence[object]r   r  r
  ztuple[GraphModule, GraphModule])r   rI   rP  r  r
  r;   )_r   r  rL  r  rv  ro   set_real_inputsr   rI   r   r   r   r   r   r   r   r  r0  r	   r}  
ValueErrortorch._export.non_strict_utilsr  r$   r  r  graph_returns_tuplemake_graph_return_tuple_codegenrT   handle_dynamo_export_graphrS   r  r  r   r   r  preserve_node_metar  r  rG   pre_grad_graphs_logr*  r(   r  r  _pre_grad_graph_idrI  r&   _raise_error_for_testingr   r=   r/  r  r1   r,  _graph_counterr[   r-   r;   freezingis_grad_enabledrs  rH   r  r  r   r   r   r  functorch_configr+   torch._export.utilsr  r
   r   from_tensorScriptObject_libraryfake_class_registrymaybe_to_fake_objrF   r<  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingrP   rQ   remove_dynamo_frames)%rN  rO  rG  r  r|  r}  inputs_r   fake_inputsinpr   fir  r  r  patched_mod	fake_argsr  recursive_compile_fxr  r  r  r  r  rn  r   r-  r  r   r;  disable_ampr   r[  r  ri  r  rh  s%   ` `                              @@@@r~   rL  rL    s,	   . \\.)$ll>:=I-!1 *) LL!5,. o.+:G&+.. !' 2 2 2ww-/ )DIIMM%( 2    +* &c5<<88CdB*  
 :k:::&)%';&H
>#-a#>#>>#>!yyAHH4&0&hilhmmo')yykahhZ @o%o'" !"	 'I *GM(1I'YG L!"+"3"3Mt"T#1%5 HGA /. j %,,#%)	 v&&& 
 	
 &+&&:~, , * 
 	
 	}BBC "--fll.B.BC fk** 9
  %%&%#'#' 	 8:&,,7GEOO!!4/HF9
 K?KKK'$e 	DC 	#" 	DCt 2222 1 v}}778
 *$/ ' -8N>Q>S 	S	S	/S	 S	 	S	 S	l .UC 	 6j+N??5#8#8#:#:5>5F5F$##5+%!-6 "+!2!23CRV!W!@."		*	 	 -		6 
'*	="	"	-@"	"	 "	 
>"	H 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 !''TB&7# %#1	'#O J5b9	 HHNNDww*,dii1G!+DKK!8!<%fell;;/8/D/D &d 0E 0DIIe, (0B0BCC % B B T T$-v!" !IIe,
 (0@AA/5DIIe, +# CB (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  +->-G-G-I79)+G LU9-I-I++q	 	DC 	#" 	DC|	 OOI&MM!!/2&&(""=9# + +'9#1!-37)/=%5
 /
+ >= )( 32 '&y	 	DC 	#" 	DC{ *)0( HGGA /..   l
 CBh LU9-I-I-I++++* $ 9 ,,.D89 >== )(( 322 '&&y	 	DCC 	#"" 	DCCs  'de,$1ed+-d+e-d0?4e79e4A	e=&d5#	e,	e,j&Ai>(B8i$ 	i>)	j<G<i$8Ae>	e>	Ce>	e>	/Ci$g	f*	f(	f*1	g	:	i$	i>	ji$5 i
	h0+h g;g	h"	h0+	i
	4	i$=	i>	j
d(+
e5
e?e	e,
e	e,,
e;>
fi$
ff*!	g	*
f84g	;i$
gi$
g8"g33g88g;;
h	h	h0
h$ h0'	i
	0
h>:i
		i$

ii$	i>$
i2.i>5	j>
j	j
j&c                   [        U [        5      (       d  g[        U 5      R                  u  n[        U[        [
        45      (       a  g[        U[        R                  R                  R                  5      (       a~  [        UR                  S5      (       ac  [        UR                  R                  R                  5      S:  a6  [        S UR                  R                  R                   5       5      (       a  gg)z"True if a FX graph returns a tupleT_schemarV   c              3  R   #    U  H  n[        UR                  5      S :H  v   M     g7f)r   N)r	  r~  )r   rets     r~   r   &graph_returns_tuple.<locals>.<genexpr>	  s     O5NcCHH)5Ns   %'F)r   rI   rl   r   r   re  r   r   r   r   r  r   r   r  returnsr2  )r   rvs     r~   r  r  	  s    b+&&O  ER"tUm$$2uxx}}))**BIIy))		!!))*Q.ORYY5F5F5N5NOOO r   c                  ^^ [        U 5      nUR                  u  n[        R                  " U5      u  nmU R                  R                  U5         U R                  R                  U5        SSS5        U R                  R                  U5        [        U 5      (       d   eU" X5      m[        R                  " T5      SUU4S jj5       nU$ ! , (       d  f       Ng= f)zu
Mutate gm so it returns a tuple.  This is only needed for graphs
not created by torchdynamo that return non-tuples.
Nc                 >   > [         R                  " T" U 0 UD6T5      $ ry   )r+  tree_unflatten)r   r   r  specs     r~   r]  (make_graph_return_tuple.<locals>.wrapper	  s     $$[$%A&%A4HHr   )r   r   r   r   r
  r   )rl   r   r+  r  r   inserting_beforer   rg  r  r  wraps)r   r  
compile_gmr   r  r]  r  r  s         @@r~   r  r  	  s     r?DIIER""2&HB		"	"4	(
 
)HHr""""R(K__[!I "I N 
)	(s   C
Cc                4  ^^ U R                   R                  m[        R                  R                   R	                  5       U R                   l        U R                  5         U" U TR                  " U6 5      m[        R                  " T5      SUU4S jj5       nU$ )z
`torch._dynamo.export` embeds pytrees in the FX graph codegen object,
convert that to a normal FX graph so inductor can compile it.
c                 F   > TR                  T" TR                  " U 6 6 5      $ ry   )process_outputsprocess_inputs)r   codegenr  s    r~   r]  +handle_dynamo_export_graph.<locals>.wrapper	  s$    &&{G4J4JD4Q'RSSr   )r   r   r
  r   )	r   r  r   r   CodeGenrh  r  r  r  )r   r  r  r]  r  r  s       @@r~   r  r  	  su     hhG..0BHHLLNR!7!7!@AK__[!T "T Nr   c                   SS jn[         R                  " U R                  R                  5       U R                  5       H  n[        U[        5      (       d  M  [        U5      nU(       a2  [        U5      (       a"  UR                  5       [        R                  :w  a  M`  [        U5      nUR                  SS9(       a    g U" UR                  5       5        M     g )Nc                    SSK Jn  U c   e[        U R                  5      nUR	                  U 5      n[
        R                  " UR                   S35        U" S5      e)Nr   )rR   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrR   r   r~  get_device_propertiesr   r   r   )r}  rR   device_interfacedevice_propss       r~   warn_and_skip1_check_triton_bf16_support.<locals>.warn_and_skip	  s\    /!!!3FKK@'==fE  !!Z[	
 /00r   F)including_emulation)r}  zOptional[torch.device]r
  r   )	itertoolschaingraph_inputsr  r  r   rc   rb   rB   	get_dtyper   bfloat16r   is_bf16_supported
get_device)r   r  r   r  r  s        r~   r  r  	  s    
1  2 2 9 9 ;U=P=PQ$''%d++&&~~5>>1 4K@--%-Hdoo'( Rr   )optionsc                  SSK Jn  U" U 5      (       d   S5       eSnSn[        U R                  R                  [
        R                  R                  R                  5      (       a  U R                  R                  n[
        R                  R                  R                  5       U R                  l        U R                  5         UR                  R                  b  UR                  R                  nUR                  R                  b  UR                  R                  nO:[        U S5      (       a  U R                  n[        U S5      (       a  U R                  nUb  [         R"                  " U5      OSnUb  [         R"                  " U5      OSn	[         R$                  " X=(       d    0 45      u  p['        S U
 5       5      (       a  S	S
KJnJn  U" UR.                  S5      eU
 Vs/ s H,  n[        US   [
        R0                  5      (       a  US   OSPM.     nnUb  X:w  a  [3        SU SU 35      eUc  UU	S.O0 UEUU	S.EnX4$ s  snf )z
Flatten the inputs to the graph module and return the flat inputs and options.
Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
rV   )r  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  \   #    U  H"  n[        US    [        R                  5      v   M$     g7f)rV   N)r   r   r  r   s     r~   r   '_aoti_flatten_inputs.<locals>.<genexpr>
  s&     
M9LA:adE..//9Ls   *,r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)rL  r  r   r   r  r   r   rT   r  rh  pytree_infoin_specout_specr  r  r  r+  treespec_dumpstree_flatten_with_pathr  r  r  r  INVALID_INPUTr   r  )r   r   r   r  r  r  r  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr  r  r   flat_example_inputss                   r~   _aoti_flatten_inputsr  	  s5    0r"" 	" GH"((##UXX^^%B%BCC((##!HHNN224
&&2))11G''3**33H 2z""kkG2{##||H;B;N..w7TV+3+?h'R  *0)F)F	|*& 
M9L
MMM>''8
 	
 CVBUQ
1Q4..!D8BU   }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   &3I)r}   r	  r
  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   r  r   r  r
  rw  )r
  ztuple[FxCompileMode, bool])r   r   r
  	list[int])r   rI   r
  rw  )r
  zCallable[..., None]rv  )r  rI   r   rI   r
  rw  )r  rI   r   rI   r-  r   r
  rI   )F)r   rI   rB  r  r
  zGenerator[str, None, None])r   rI   rP  r  r
  rI   )r   rI   rB  r  r
  rw  )r   rI   r\  r  r
  rw  )TNN)
r   rI   ri  r  rj  zOptional[list[str]]rk  z)Optional[Callable[[torch.fx.Node], bool]]r
  z"tuple[GraphModule, dict[str, int]])r   rI   r
  r  )rP  r  r
  "AbstractContextManager[None, None])r  r  r  r  r
  r  )r   rI   rP  r  r  r  r
  z torch._subclasses.FakeTensorModery   )r  z$Optional[Union[str, dict[str, Any]]]r
  zdict[str, Any])r
  zGenerator[None, None, None]r  )r   rI   rP  r  rK  r  r
  r;   )
r   rI   rP  r  rL  r  rK  r  r
  r;   )r  r  r  r  r
  r  )r   )r  Callable[..., Any]r  r  r  r   r  zlist[Optional[str]]r  r  r\  r  r  ztuple[torch.Tensor, ...]r  zSequence[PlaceholderInfo]r  ztuple[int, ...]r
  r  )r   torch.Tensorr
  r  )r'  r  r(  r  r)  r  r
  rw  )r  r  r  zlist[torch.Tensor]r  r  r
  r:  )
rN  rI   rO  r9  rG  r  r  zOptional[dict[str, str]]r
  z%Union[list[Union[str, Weights]], str])re  rI   rf  r  rg  rI   rh  r   rG  r  r  r=   r  r   ri  r1   r
  z0Callable[[list[object]], Sequence[torch.Tensor]])r
  zdict[str, object])r   torch.fx.GraphModuler
  zAbstractContextManager[None])rN  rI   rO  r  rG  zCallable[..., OutputCode]r  Optional[dict[str, Any]]r|  z.Optional[dict[OpOverload, Callable[..., Any]]]r}  r  r
  zPUnion[Callable[[list[object]], Sequence[torch.Tensor]], str, list[str], Weights])r   rI   r  r  r  r  r
  r  )r   ra   r
  rw  )
r   r  r   z!Union[list[Any], tuple[Any, ...]]r   r  r  r  r
  z tuple[list[Any], dict[str, Any]])
__future__r   r  enumr  r  r  r  r   r   r  r1  r   abcr   r   collectionsr   r   inspectr   r	   operatorr
   typingr   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreer+  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   rz   torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr    r!  r!   r"   r#   r$   r%   r&   r'   r(   r)   torch._functorchr  7torch._functorch._aot_autograd.subclass_parametrizationr*   torch._functorch.aot_autogradr+   r,   r-   torch._inductor.codecacher.   r/   r0   r  r1   r2   r3   r4   torch._inductor.debugr5   torch._inductor.output_coder6   r7   r8   r9   r:   r;   'torch._inductor.runtime.cache_dir_utilsr<   torch._inductor.utilsr=   r>   r?   r@   rA   rB   rC   rD   rE   "torch._library.fake_class_registryrF   torch._loggingrG   torch._utils_internalrH   rI   %torch.fx.experimental.symbolic_shapesrJ   rK    torch.fx.passes.fake_tensor_proprL   torch.monitorrM   torch.utils._ordered_setrN   _dynamo.backends.commonrP   _dynamo.excrQ   rR   fx._lazy_graph_modulerS   fx.graphrT   utils._tritonrU   r  rW   codegen.commonrX   rY   r*  rZ   decompositionr[   excr\   fx_passes.joint_graphr]   fx_passes.post_gradr^   r_   fx_passes.pre_gradr`   r   ra   irrb   rc   output_coderd   triton_bundlerre   rf   rg   rh   ri   rj   rk   rl   rm   rn   virtualizedro   collections.abcrp   rq   rr   
torch._opsrs   )torch.export.pt2_archive._package_weightsrt   ru   rv   rw   r  r   r   torch._inductor.fb.utils&torch._functorch._aot_autograd.schemasr   r   r   Enumr   r   r  r
  r   r   r   _logginggetArtifactLoggerr  r  r  r)  r  r   r   r   r   	lru_cacher   cacher   r  r<  rG  rI  rV  rZ  rt  r  r  r  r  r  contextmanagerr  r  r  r  r  rf  rl  r{  r  r+  r   r$  r*  r  rE  r  rs  rv  r  rL  r  r  r  r  r  r   r   r~   <module>rQ     s   "    	    	 
   # # -     I I U U  $  $ $ A  ;  D =
 
 
 8 
 O N  A  >
 
 
 @ + ?   W ; & / 2 5 : % &  U  .  5 B /   ' I )
 
 
  3:%A$ t_T]((**% L DII +: %=$> !!!00<Hnn66xARS ~~77BTU NN44'  ~~77BTU 
4A*'=  T/ / 	
 	
?/DK	K%K8FKK^ 38!!+/!!0NN'N N( 38+/	(	+ "15FJ	E(E(E( /E( D	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ <@(8((  
;y 
; *
*
'*
 '*
 	*
Z 23@@'@ -@ 	@ 4@F
B B# #4s) slYY'Y
 #Y -Y YB  $   J (*) +-.0*,))$) 	)
 &) ) ) () ,) () )XU		  
	 (*\M\M\M %\M &	\MD )9/3	;+;+$;+ &;+ -	;+
 +;+| qc#c+c c 	c
 &c c c %c 6cL&* 0@/3EI"G9G9(G9 -G9 -	G9
 CG9 G9 VG9T$ # 	4 # 	,)D (,R(
 )-R(R(
+R( %R(
 &R( &R(r   