
    7hv                      % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKrS SKJrJr  S SKJrJr  S SKJr  S SKJrJrJrJrJrJrJrJrJr  S SKJrJ r   S SK!r!S SK"r"S SK#r"S SK$J%r%  S S	K&J'r(  S S
K)J*r*  S SK+J,r,  S SK-J.r/  S SK0J1r1J2r2J3r3  S SK4J5r5J6r6  SSK7J8r8J9r9  SSK:J;r;  SSK<J=r=J>r>  SSK?J@r@JArAJBrBJCrCJDrDJErEJFrFJGrGJHrHJIrIJJrJJKrK  SSKLJMrMJNrNJOrOJPrPJQrQJRrR  \(       ac  S SKSJTrTJUrUJVrV  S SK#JWrW  SSKXJYrY  SSKZJ[r[J\r\J]r]J^r^  SSK_J`r`  SSKaJbrbJcrcJdrd  SSKeJfrf  \ " S5      rg\\\c   /\b4   rh\i\f   rj\\k\!R                  4   rm\krn\"R                  R                  \qS5      rr\R                  " \q5      rtGSS jru\R                   " S  S!5      5       rw " S" S#\R$                  5      rx " S$ S%\5      ry\E" S&S'9 " S( S)\y5      5       rz " S* S+5      r{\R                   " S, S-5      5       r|\R                   " S. S/5      5       r}\R                   " S0 S15      5       r~\R                   " S2 S35      5       r\R                   " S4 S55      5       r\\z\|\}\\~4   r0 rS6\S7'    " S8 S95      r0 rS:\S;'   0 rS<\S='     GS           GSS> jjr " S? S@\5      r    GSSA jr      GSSB jrGSSC jr GS	     GS
SD jjrGSSE jr\GR                  GSSF j5       r        GSSG jr      GSSH jrGSSI jr\"GR&                  \"GR(                  \"GR*                  \"GR(                  0\"GR,                  \"GR.                  \"GR0                  \"GR2                  \"GR4                  \"GR6                  \"GR8                  \"GR:                  \"GR<                  \"GR>                  \"GR@                  4 V s0 s H  n X _M     sn ErSJ\SK'           GSSL jr        GSSN jr " SO SP5      r " SQ SR\/5      r. " SS ST5      r\GRL                  " SU\GRN                  SV9rGSSW jr " SX SY\=\\N\   5      r\R                   " SZ S[5      5       r\" GS0 S\\" \%GRZ                  S] S^S_9_S`\" \%GRZ                  Sa Sb ScSd9_Se\" \%GRZ                  Sf Sg ShSd9_Si\" \%GRZ                  Sj Sk SlSd9_Sm\" \%GRZ                  Sn So SpSd9_Sq\" \%GRZ                  Sr Ss SqSt9_Su\" \%GRZ                  Sv Sw SxSd9_Sy\" \%GRZ                  Sz S{ S| SyS}9_S~\" \%GRZ                  S S~S_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S S S SS9_S\" \%GRZ                  S S SSt9_S\" \%GRZ                  S S SSd9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S S SSd9_S\" \%GRZ                  S S SSd9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_S\" \%GRZ                  S SS_9_6rS\S'   GSS jr " S S\A5      r " S S\D5      r " S S\5      r\R                   " S S5      5       r " S S5      r\" 5       r " S S5      r " S S5      r\ " S\kS9r\ " SM\\S9r\(       a  \\"GRv                  \P\\\\S4   4   4   r " S S\\\4   5      r " S S5      r " S S\\\   5      r\R                   " S S5      5       r\GR                  GSS j5       r " S GS 5      r " GS GS\>5      rgs  sn f (      )annotationsN)ABCabstractmethod)autoEnum)chain)	AnyCallablecastClassVarGeneric
NamedTupleOptionalTYPE_CHECKINGUnion)SelfTypeVar)ELEMENTWISE_TYPE_PROMOTION_KIND)_pytree)
OrderedSet)int_oo)PythonPrinter)free_symbol_is_typesymbol_is_typeSymT)bound_sympyValueRanges   )configmetrics)DtypePropagationOpsHandler)BasicMathOpsMixinDefaultHandler)boolean_opsDeferredLineBasegenerate_assertget_current_backendIndentedBufferir_dataclass
ScopedDict	sympy_dotsympy_index_symbol
sympy_substriton_typeunique)ops
OpsHandlerOpsValueReductionType	StoreModeV)IteratorMutableMappingSequence)GraphModule)CustomGraphModulePass)BufferChoiceCallerFixedLayoutIRNodeLoopBody)BaseScheduling	SchedulerSchedulerNode   PythonWrapperCodegen_Tschedulec                    [         R                  [        R                  5      (       a  [         R	                  SU 5        g g )NzData type propagation: %s)schedule_logisEnabledForloggingDEBUGdebug)msgs    X/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/codegen/common.pydata_type_loggerrQ   W   s-      //6< 0    c                  j    \ rS rSr% SrS\S'   S\S'   SS jr\SS j5       rSS	 jr	\SS
 j5       r
Srg)FileBackedGraphModule\   z
Output of FX wrapper codegen. Exposes the same methods as ModuleType, but these
map back to a GraphModule instead of Python source.
r9   gmzCallable[..., Any]compiled_fnc                .   [         R                  " SSSS9U l         [        R                  " [        R
                  U R                   R                  5        U R                    nUR                  U R                  5        S S S 5        g ! , (       d  f       g = f)Nzw+z.pyF)modesuffixdelete)	tempfileNamedTemporaryFileatexitregisterosremovenamewritevalue)selffs     rP   __post_init__#FileBackedGraphModule.__post_init__f   s^     !33eE
 			4==#5#56]]aGGDJJ ]]s   !B
Bc                .    U R                   R                  $ N)r\   rb   re   s    rP   __file__FileBackedGraphModule.__file__p   s    }}!!!rR   c                     U R                   " U6 $ rj   )rW   re   argss     rP   callFileBackedGraphModule.callt   s    &&rR   c                .    U R                   R                  $ rj   )rV   coderk   s    rP   rd   FileBackedGraphModule.valuew   s    ww||rR   )r\   NreturnNonerw   str)rp   	list[Any]rw   r	   )__name__
__module____qualname____firstlineno____doc____annotations__rg   propertyrl   rq   rd   __static_attributes__ rR   rP   rT   rT   \   sF    
 	O##  " "'  rR   rT   c                  H    \ rS rSrSrSrSr\S	S j5       r\S
S j5       r	Sr
g)WorkspaceZeroMode|   r   rD   r   c                    X:X  d  U[         R                  :X  a  U $ U [         R                  :X  a  U$ [        SU < SU< S35      e)NzWorkspaceZeroMode.combine(, ))r   UNINITIALIZEDNotImplementedErrorabs     rP   combineWorkspaceZeroMode.combine   sI    6Q+999H!///H!$>qe2aU!"LMMrR   c                P    U (       a  [         R                  $ [         R                  $ rj   )r   ZERO_ON_CALLr   )	zero_fills    rP   	from_boolWorkspaceZeroMode.from_bool   s    $111 ...rR   r   N)r   r   r   r   rw   r   )r   boolrw   r   )r|   r}   r~   r   r   r   ZERO_PER_GRAPHstaticmethodr   r   r   r   rR   rP   r   r   |   s9    MLNN N / /rR   r   c                  @    \ rS rSrSr\SS j5       r\SS j5       rSrg)	CodegenSymbol   zH
An IR object possibly corresponding to a variable in the wrapper code.
c                    g rj   r   rk   s    rP   get_nameCodegenSymbol.get_name       rR   c                    g rj   r   rk   s    rP   get_exampleCodegenSymbol.get_example   r   rR   r   Nry   rw   z!Union[torch.Tensor, sympy.Symbol])	r|   r}   r~   r   r   r   r   r   r   r   rR   rP   r   r      s/        rR   r   T)frozenc                  D   \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   SrS	\S'   \R                  r	S\S'   \
SS S jj5       r\
S!S j5       r\
S"S j5       r\
S"S j5       rS#S jr\rS$S jrS%S jrS&S jr\S&S j5       r\r\r\rS'S jrS(S jrS(S jrS)S jrS*S jrSrg)+WorkspaceArg   a  A temporary buffer used for a single kernel, then discarded.

Not registered as a traditional buffer since there are no users,
so it would be dead code eliminated.

Args:
    nbytes: The size of the buffer in bytes.
    zero_fill: Whether the buffer should be initialized to zero.


sympy.Exprcountr   	zero_modetorch.devicedevicerz   
outer_namews_ptr
inner_nametorch.dtypedtypec                P    U  [        [        R                  R                  5       3$ rj   )nextr5   graphworkspace_id)prefixs    rP   unique_nameWorkspaceArg.unique_name   s!    $qww334566rR   c                    U R                   UR                   :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ rj   )r   r   r   r   s     rP   can_joinWorkspaceArg.can_join   s@     LLALL(XQWW-?XAHHPQPXPXDX	
rR   c           	         [        U R                  UR                  -   [        R                  U R                  UR                  5      U R
                  U R                  U R                  U R                  S9$ N)r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   r   s     rP   joinWorkspaceArg.join   sS    ''AGG#'//Q[[I''88||||
 	
rR   c           	        U R                   UR                   :X  a4  U R                  UR                  :X  a  U R                  UR                  :X  d   e[        [        R
                  " U R                  UR                  5      [        R                  U R                  UR                  5      U R                   U R                  U R                  U R                  S9$ r   )r   r   r   r   sympyMaxr   r   r   r   r   r   s     rP   maximumWorkspaceArg.maximum   s     GGqww188qxx#7ALLALL<X	
X))AGGQWW-'//Q[[I''88||||
 	
rR   c                    U R                   $ rj   r   rk   s    rP   
get_deviceWorkspaceArg.get_device   s    {{rR   c                    U R                   $ rj   r   rk   s    rP   	get_dtypeWorkspaceArg.get_dtype   s    zzrR   c                >    U R                  5       R                  5       $ rj   )
get_layoutr   rk   s    rP   r   WorkspaceArg.get_example   s     ,,..rR   c                `    SSK Jn  U" U R                  U R                  U R                  /S/S9$ )Nr   )r=   rD   )r   r   sizestride)irr=   r   r   r   )re   r=   s     rP   r   WorkspaceArg.get_layout   s.    $;;****3	
 	
rR   c                "    U R                  5       $ rj   )r   rk   s    rP   layoutWorkspaceArg.layout   s      rR   c                6    [         R                  R                  $ rj   )r   SZerork   s    rP   
get_offsetWorkspaceArg.get_offset   s    ww||rR   c                    U R                   /$ rj   )r   rk   s    rP   get_sizeWorkspaceArg.get_size   s    

|rR   c                8    [         R                  R                  /$ rj   )r   r   Onerk   s    rP   
get_strideWorkspaceArg.get_stride   s    }rR   c                    U R                   $ rj   )r   rk   s    rP   r   WorkspaceArg.get_name   s    rR   c                    / $ rj   r   rk   s    rP   get_inputs_that_alias_output)WorkspaceArg.get_inputs_that_alias_output   s    	rR   r   N)
workspace_)r   rz   rw   rz   )r   r   r   r   rw   r   )r   r   r   r   rw   r   )rw   r   )rw   r   r   )rw   r=   )rw   r   )rw   list[sympy.Expr]ry   )rw   	list[str])r|   r}   r~   r   r   r   r   torchuint8r   r   r   r   r   r   r   get_device_or_errorr   r   r   r   r   get_output_specmaybe_get_output_specmaybe_get_layoutr   r   r   r   r   r   r   rR   rP   r   r      s    	   OJE;$7 7 
 

 
 
 
 
 %/
 ! ! !O&!rR   r   c                  (    \ rS rSrSS jrSS jrSrg)TritonScratchWorkspacei  c                    Xl         X l        g rj   )r   _generate_dtype_str)re   r   generate_dtype_strs      rP   __init__TritonScratchWorkspace.__init__  s    	#5 rR   c                "    U R                  5       $ rj   )r   rk   s    rP   r   )TritonScratchWorkspace.generate_dtype_str  s    ''))rR   )r   r   N)r   intr   Callable[..., str]ry   )r|   r}   r~   r   r   r   r   r   rR   rP   r   r     s    6*rR   r   c                  x    \ rS rSr% S\S'   S\S'   S\S'   \R                  R                  rS\S'   S	r	S
\S'   Sr
g	)	TensorArgi  rz   rb   bufferr   r   r   offsetNOptional[str]alias_ofr   )r|   r}   r~   r   r   r   r   r   r  r  r   r   rR   rP   r   r     s.    
IKFJ%"Hm"rR   r   c                  >    \ rS rSr% S\S'   S\S'   \S	S j5       rSrg)
SizeArgi  rz   rb   r   exprc                    g rj   r   rk   s    rP   r  SizeArg.alias_of  s    rR   r   Nrw   r  )r|   r}   r~   r   r   r   r  r   r   rR   rP   r  r    s    
I
 rR   r  c                       \ rS rSr% S\S'   Srg)ConstexprArgi  rz   rb   r   Nr|   r}   r~   r   r   r   r   rR   rP   r  r    s    
IrR   r  c                  >    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	rg
)TMADescriptorArgi$  rz   rb   api_typezOptional[list[sympy.Expr]]block_shapeOptional[torch.dtype]r   r   Nr  r   rR   rP   r  r  $  s    
IM++  rR   r  c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
DeviceCodegeni,  SchedulingConstructor
schedulingWrapperConstructorwrapper_codegenNOptional[WrapperConstructor]cpp_wrapper_codegenr   )r|   r}   r~   r   r   r  r   r   rR   rP   r  r  ,  s    %%''8<5<rR   r  zdict[str, DeviceCodegen]device_codegensc                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jrSS jrSS jrSS jr      SS jrSrg)DeviceOpOverridesi8  c                    [         erj   r   re   rb   s     rP   import_get_raw_stream_as*DeviceOpOverrides.import_get_raw_stream_as9      !!rR   c                    [         erj   r  re   
device_idxs     rP   
set_deviceDeviceOpOverrides.set_device<  r"  rR   c                    [         erj   r  rk   s    rP   synchronizeDeviceOpOverrides.synchronize?  r"  rR   c                    [         erj   r  r$  s     rP   device_guardDeviceOpOverrides.device_guardB  r"  rR   c                    [         erj   r  rk   s    rP   cpp_device_guard"DeviceOpOverrides.cpp_device_guardE  r"  rR   c                    [         erj   r  rk   s    rP   cpp_aoti_device_guard'DeviceOpOverrides.cpp_aoti_device_guardH  r"  rR   c                    [         erj   r  rk   s    rP   cpp_stream_guard"DeviceOpOverrides.cpp_stream_guardK  r"  rR   c                    [         erj   r  rk   s    rP   cpp_aoti_stream_guard'DeviceOpOverrides.cpp_aoti_stream_guardN  r"  rR   c                    [         erj   r  rk   s    rP   cpp_getStreamFromExternal+DeviceOpOverrides.cpp_getStreamFromExternalQ  r"  rR   c                    [         erj   r  rk   s    rP   kernel_headerDeviceOpOverrides.kernel_headerT  r"  rR   c                    [         erj   r  rk   s    rP   kernel_driverDeviceOpOverrides.kernel_driverW  r"  rR   c                    [         erj   r  rk   s    rP   cpp_stream_type!DeviceOpOverrides.cpp_stream_typeZ  r"  rR   c                    [         erj   r  rk   s    rP   aoti_get_stream!DeviceOpOverrides.aoti_get_stream]  r"  rR   c                    [         erj   r  rk   s    rP   cpp_kernel_type!DeviceOpOverrides.cpp_kernel_type`  r"  rR   c                    [         erj   r  rk   s    rP   cpp_device_ptr DeviceOpOverrides.cpp_device_ptrc  r"  rR   c                    [         erj   r  rk   s    rP   tma_descriptor_helpers(DeviceOpOverrides.tma_descriptor_helpersf  r"  rR   c                    [         erj   r  )re   idx	workspaces      rP   cpp_global_scratch$DeviceOpOverrides.cpp_global_scratchi  s
     "!rR   r   Nrb   rz   rw   rz   )r%  r   rw   rz   ry   )rS  r   rT  r   rw   zOptional[tuple[list[str], str]])r|   r}   r~   r   r   r&  r)  r,  r/  r2  r5  r8  r;  r>  rA  rD  rG  rJ  rM  rP  rU  r   r   rR   rP   r  r  8  so    """"""""""""""""""#9"	("rR   r  zdict[str, DeviceOpOverrides]device_op_overrides_dictz*dict[str, Optional[CustomGraphModulePass]]custom_backend_passesc                <    [        XU5      [        U '   U[        U '   g rj   )r  r  rY  )r   device_schedulingdevice_wrapper_codegendevice_cpp_wrapper_codegendevice_custom_passs        rP   register_backend_for_devicer_    s&     ,3MOF %7&!rR   c                      \ rS rSr\" 5       r\" 5       r\" 5       r\" 5       r\" 5       r	\" 5       r
\" 5       r\" 5       r\" 5       r\" 5       rSrg)BackendFeaturei  r   N)r|   r}   r~   r   r   FOREACH	BUCKETIZEINPLACE_BUFFERSMASKED_SCATTER_WITH_INDEXSCANSORTTUPLE_REDUCTIONPREFER_STORE_LOOP_ORDERTRITON_TEMPLATESREDUCE_TO_SINGLE_ELEMENTr   r   rR   rP   ra  ra    sL    fGIfO $6D6DfO"fv#vrR   ra  c                \   U c
  [        5       $ [        5         [        U [        R                  5      (       a  U R
                  nO=[        U [        5      (       d   [        U 5      5       eU n[        R                  " U5      n [        U5      nU(       d   eU" S 5      nUR                  U 5      $ rj   )	r   init_backend_registration
isinstancer   r   typerz   get_scheduling_for_deviceget_backend_features)r   device_typescheduling_ctorr  s       rP   rq  rq    s     ~|&%,,''kk&#&&4V4&k*/<O? &J**622rR   c                L    [        U[        5      (       d   eU[        U 5      ;   $ )zSee also V.graph.has_feature)rn  ra  rq  )r   features     rP   has_backend_featurerv    s(     g~....*6222rR   c                @    U [         ;   a  [         U    R                  $ S $ rj   )r  r  r   s    rP   rp  rp    s     17?1J?6"--TPTTrR   c                h    U [         ;   a(  [         U    nU(       a  UR                  $ UR                  $ g rj   )r  r  r  )r   cpp_wrapperwrapper_codegen_objs      rP   get_wrapper_codegen_for_devicer{    sA      -<V-D   33	
 %44	

 rR   c                ,    U [         ;   a	  [         U    $ S $ rj   )rY  r   s    rP   "get_custom_backend_pass_for_devicer}    s    ,26K,K (UQUUrR   c                   ^^ SSK Jn   SSKJn  SSKJn  SSKJn  SSKJ	n  SSK
Jn  SSKJn  SS	KJn  SS
KJn  SSKJn	  [)        S5      c9  U UUS.m[+        SU4S jU	[,        R.                  R0                  (       a  UOU5        [)        S5      c  UUS.m[+        SU4S jU	U5        [)        S5      c  [+        SUU	U5        [)        S5      c  [+        SUU	U5        [2        R4                  R7                  5       n
U
S:w  aS  [)        U
5      cF  SSKJn   U" S5      nU" S5      nU" S5      nU(       a  U(       a  U(       a  [+        U
UUU5        g g g g g g ! [<         a     g f = f)NrD   )CppScheduling)CppWrapperCpu)CppWrapperCpuArrayRef)CppWrapperGpu)CppWrapperMps)CUDACombinedScheduling)HalideScheduling)MetalScheduling)TritonSchedulingrE   cpu)cpphalidetritonc                6   > T[         R                     " U 5      $ rj   )r   cpu_backend)r  cpu_backendss    rP   <lambda>+init_backend_registration.<locals>.<lambda>  s    |F,>,>?
KrR   cuda)r  r  c                6   > T[         R                     " U 5      $ rj   )r   cuda_backend)r  cuda_backendss    rP   r  r    s    }V-@-@A*MrR   xpumpsprivateuseoner   )_get_custom_mod_func
SchedulingrF   CppWrapperCodegen)r  r  cpp_wrapper_cpur  cpp_wrapper_cpu_array_refr  cpp_wrapper_gpur  cpp_wrapper_mpsr  cuda_combined_schedulingr  r  r  r  r  r  r  wrapperrF   rp  r_  r   aot_inductorallow_stack_allocationr   _C_get_privateuse1_backend_name torch.utils.backend_registrationr  RuntimeError)r  r  r  r  r  r  r  r  r  rF   private_backendr  r[  r  r  r  r  s                  @@rP   rm  rm    sp   ".@..@($(- '/ &&

 	$K ""99 "	
 !(0 -&
 	$M 		
 !'/# 		
 !'/# 		
 hh<<>O?*%o6>I	 4\ B23IJO"67J"K _9L+#%#'	 :M_  ? 	+   		s   ;E 
E E c                J    SSK Jn  / U Q[        XR                  U5      5      P$ )Nr   )FlexibleLayout)r   r  r+   contiguous_strides)index
index_varssizesr  s       rP   index_prevent_reorderingr  !  s*    
 $ UUTIj*K*KE*RSTTrR   c                    U[         U '   g rj   )rX  )r   device_op_overridess     rP   register_device_op_overridesr  ,  s     (;V$rR   c                    [        U [        5      (       d   [        U 5      5       e[        (       d  SSKJnJn  SSKJn  SSK	Jn  [        U    $ )NrD   )cpu_device_op_overridesmps_device_op_overrides)r  )
rn  rz   ro  rX   r  r  r  r  r  )r   r  r  r  xpu_op_overridess        rP   get_device_op_overridesr  2  s8    fc""0DL0"##F-@#F++rR   zdict[torch.dtype, torch.dtype]DTYPE_TO_COMPUTATION_DTYPEc                   U [        5       ;   a  [        R                  $ U S;   a  SU;   a  US   $ US   $ U S;   a  [        R                  $ U S;   a  [        R                  $ U S:X  a  SU;   a  US   $ US   $ U S:X  a  SU;   a  US   $ US   $ U S	;   a$  US   n[
        R                  R                  U5      $ U S
:X  a  SU;   a  US   $ US   $ g)zC
Given op name and a list of input dtypes, deduce the output dtype
)to_dtype
index_exprr   )randrandn)	get_index	randint64	load_seed	reductionrD   constant)loadstorestore_reductionto_dtype_bitcastN)r$   r   r   floatint64r5   r   r   )op_namerp   kwargsbuf_names       rP   deduce_output_dtype_by_namer  S  s
    +-zz	  
 #*V"3vgAbA	  
 {{	  

 {{	K	")V"3vg@a@	J	")V"3vgAbA	  

 7ww  **	&	&")V"3vgAbArR   CSEVariableTypec                *   [        5       n[        R                  R                  (       a(  US:X  a"  U R	                  SU S[        U5       S35        g [        R                  R                  (       a  US:X  a  SSKJnJ	n  [        X5      (       d   [        U5      5       eU[        R                  :X  a"  UR                  (       a  SU S	3nO2S
U SU S3nO(SU S3nUR                  (       a  SU S3nSU SXR    S3nU R	                  SU S35        g g g )Nr  ztl.static_assert(z
.dtype == r   r  rD   )CppCSEVariableDTYPE_TO_CPPzIsVecMaskType<decltype(z	)>::valuezstd::is_same_v<decltype(z$), bool> || std::is_same_v<decltype(z), int>z	decltype(z	typename z::value_typezstd::is_same_v<r   >zstatic_assert(z);)r'   r   test_configsruntime_triton_dtype_assert	writeliner.   static_cpp_dtype_assert	cpp_utilsr  r  rn  ro  r   r   is_vec)r   varr   backendr  r  
is_same_dt
c_var_types           rP   check_dtyper  }  s    "#G667h;N,SEK<N;OqQR				4	4E9I;#..9S	9.EJJzz6se9E
  8u<`ad`eelm
$SE+Jzz(LA
*:,b9L8MQOJ>*R89! :J	4rR   c                  x    \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	\
SS j5       r\
SS	 j5       rS
rg)DataTypePropagationi  c                    Xl         SUR                  R                  0U l        UR                  R                  5        H  u  p#UR                  U R                  U'   M      g Nroot)body
root_blockr   graphs	subblocksitems)re   r  kvs       rP   r   DataTypePropagation.__init__  sL    	DOO))B
 NN((*DAWWDKKN +rR   c                   UR                   nU Vs/ s HB  n[        U[        R                  R                  5      (       d  M.  UR
                  S:w  d  M@  UPMD     nn[        U5      S:X  a  g [        S U 5       5      nU(       d  g [        R                  " [        R                  U Vs/ s H)  o3R                  [        R                     R                  PM+     sn5      $ s  snf s  snf )Nplaceholderr   c              3     #    U  HR  n[         R                  UR                  ;   =(       a)    UR                  [         R                     R                  S Lv   MT     g 7frj   )OptimizationContextkeymetar   ).0ns     rP   	<genexpr>BDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<genexpr>  sR      )
 !  ##qvv- B*../55TAB s   AA)all_input_nodesrn  r   fxNodeoplenall	functoolsreducepromote_typesr  r  r  r   )re   nodeinputsr  input_nodesall_input_nodes_propagateds         rP   deduce_node_dtype_by_inputs/DataTypePropagation.deduce_node_dtype_by_inputs  s    %%
!Auxx}}!=A!$$-BWAv 	 
 {q %( )
 !)
 &
"
 *<GHKqVV'++,22KH
 	

  Is   -C C C )0C%
c                l    U R                   UR                     nU R                  U5      nU(       d   eU$ rj   )r  targetpropagate_graph)re   r  	sub_graphr   s       rP   deduce_node_dtype_by_subgraph1DataTypePropagation.deduce_node_dtype_by_subgraph  s0    KK,	$$Y/urR   c                   UR                   S:X  a  g UR                  S:X  a  [        UR                  5      S:w  a  g UR                  [        R
                  :X  aY  UR                  S   n[        U[        R                  R                  5      (       d   [        U5      5       eU R                  U5      $ [        UR                  [        5      (       d   [        UR                  5      5       eUR                  R                  S5      (       a  U R                  U5      $ [        UR                  /UR                  Q70 UR                   D6=n b  U$ U R#                  U5      $ )Nr  outputrD   r   masked_subblock)r  r  r  rp   operatorgetitemrn  r   r  r  ro  deduce_node_dtyperz   
startswithr	  r  r  r  )re   r  node_argoutput_dtypes       rP   r  %DataTypePropagation.deduce_node_dtype  s$   77m#;;("s499~':;;(***yy|Hh66FXF6))(33$++s++>T$++->>+;;!!"34455d;; 8 ++ L
   //55rR   c                   UR                   (       d   eS nUR                    H  n[        R                  UR                  ;   a  UR                  [        R                     nO
[        5       nU R	                  U5      Ul        XCR                  [        R                  '   UR                  S:X  d  M  UR
                  nM     U$ )Nr  )nodesr  r  r  r  r   r  )re   r   graph_dtyper  opt_ctxs        rP   r  #DataTypePropagation.propagate_graph  s    {{{-1 KKD"&&$))3))$7$;$;<-/ 2248GM18II)--.{{h&%mm   rR   c                >    U R                  U R                  S   5      $ r  )r  r  rk   s    rP   	propagateDataTypePropagation.propagate  s    ##DKK$788rR   c                .    U " U5      R                  5       $ rj   )r  )clsr  s     rP   propagate_loopbody&DataTypePropagation.propagate_loopbody  s    4y""$$rR   c                   SSK Jn  SSKJn  [	        X5      (       d   [        U5      5       e[	        UR                  U5      (       d   [        UR                  5      5       e[        R                  UR                  5      $ )Nr   r?   )rC   )		loop_bodyr@   	schedulerrC   rn  ro  _bodyr  r  )r  r  r@   rC   s       rP   propagate_scheduler_node,DataTypePropagation.propagate_scheduler_node  s\    (-$..:T
:.$**h//Adjj1AA/"55djjAArR   )r  r  N)r  r@   rw   rx   )r  torch.fx.Noderw   r  )r  r'  rw   r   )r   ztorch.fx.Graphrw   r  )rw   r  )r  r@   rw   r  )r  rC   rw   r  )r|   r}   r~   r   r   r  r	  r  r  r  classmethodr  r%  r   r   rR   rP   r  r    sJ    %
*6:$9 % % B BrR   r  c                  D   ^  \ rS rSrSSS.       SU 4S jjjrSrU =r$ )r   i  T)simplifypc                  > U(       ag  [        U[        R                  5      (       aH  [        [        R
                  S5      (       a)  [        R
                  R                  R                  U5      n[        TU ]%  U5      $ )Nsizevars)
rn  r   Exprhasattrr5   r   r-  r*  superdoprint)re   r  r*  r+  	__class__s       rP   r1  PythonPrinter.doprint  sQ     
444*9U9U77##,,T2Dwt$$rR   r   )r  r   r*  r   r+  r   rw   rz   )r|   r}   r~   r   r1  r   __classcell__r2  s   @rP   r   r     s2    48D%%-1%=A%	% %rR   r   c                     \ rS rSrSr\SS j5       r\SS j5       r\SS j5       r\SS j5       r	\SS j5       r
\SS j5       r\SS	 j5       r\SS
 j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       r\SS j5       rSrg)OpDecompositionsi  z
Decomposes inductor ops
c                    U $ rj   r   )rd   s    rP   identityOpDecompositions.identity	  s	     rR   c                v    [         R                  " [         R                  " S[        R                  5      U 5      $ NrD   )r0   truedivr  r   int32xs    rP   
reciprocalOpDecompositions.reciprocal  s"    {{3<<5;;7;;rR   c                .    [         R                  " X 5      $ rj   )r0   mulr?  s    rP   squareOpDecompositions.square  s    wwq}rR   c                    [         R                  " [         R                  " S[        R                  5      [         R
                  " U 5      5      $ r<  )r0   subr  r   float32erfr?  s    rP   erfcOpDecompositions.erfc  s*    wws||Au}}5swwqzBBrR   c                    [         R                  " [         R                  " [         R                  " U 5      5      [         R                  " U 5      5      $ rj   )r0   rD  exprE  rK  r?  s    rP   erfcxOpDecompositions.erfcx  s,    wwswwszz!}-sxx{;;rR   c                    [         R                  " [         R                  " U 5      [         R                  " S[        R
                  5      5      $ r<  )r0   rH  rN  r  r   rI  r?  s    rP   expm1OpDecompositions.expm1  s*    wwswwqz3<<5==#ABBrR   c           	         [         R                  " [         R                  " U 5      [         R                  " S[        R                  " S5      -  [
        R                  5      5      $ )NrD   
   r0   rD  logr  mathr   rI  r?  s    rP   log10OpDecompositions.log10"  s7    wwswwqz3<<DHHRL0@%--#PQQrR   c           	         [         R                  " [         R                  " U 5      [         R                  " S[        R                  " S5      -  [
        R                  5      5      $ )NrD   r   rV  r?  s    rP   log2OpDecompositions.log2&  s6    wwswwqz3<<DHHQK#OPPrR   c           
         [         R                  " [         R                  " U [         R                  " [        R
                  " S5      [        R                  5      5      5      $ )Nr   )r0   rN  rD  r  rX  rW  r   rI  r?  s    rP   exp2OpDecompositions.exp2*  s3    wwswwq#,,txx{EMM"JKLLrR   c           	         [         R                  " [         R                  " U [         R                  " S[        R
                  5      5      5      $ r<  )r0   rW  addr  r   r>  r?  s    rP   log1pOpDecompositions.log1p.  s+    wwswwq#,,q%++">?@@rR   c                    [         R                  " S[        R                  5      n[         R                  " U[         R
                  " U[         R                  " [         R                  " U 5      5      5      5      $ r<  )r0   r  r   r>  r=  rb  rN  neg)r@  ones     rP   sigmoidOpDecompositions.sigmoid2  sC    ll1ekk*{{3SWWSWWQZ-@ ABBrR   c                v    [         R                  " U [         R                  " S[        R                  5      5      $ Nr   )r0   r   r  r   r>  r?  s    rP   reluOpDecompositions.relu7  s"    {{1cll1ekk:;;rR   c                X    [         R                  " [         R                  " X5      U5      $ rj   )r0   rb  rD  r@  yzs      rP   fmaOpDecompositions.fma;  s     wwswwq}a((rR   c                X    [         R                  " [         R                  " U 5      U5      $ rj   )r0   r  floorr   r   s     rP   floor_to_intOpDecompositions.floor_to_int@      ||CIIaL%00rR   c                X    [         R                  " [         R                  " U 5      U5      $ rj   )r0   r  ceilrv  s     rP   ceil_to_intOpDecompositions.ceil_to_intD  s    ||CHHQK//rR   c                X    [         R                  " [         R                  " U 5      U5      $ rj   )r0   r  truncrv  s     rP   trunc_to_intOpDecompositions.trunc_to_intH  ry  rR   c           	        [         R                  " X5      n[         R                  " [         R                  " U[         R                  " S[
        R                  5      5      [         R                  " [         R                  " U5      [         R                  " U5      5      5      n[         R                  " U[         R                  " X!5      U5      $ rk  )
r0   modand_ner  r   r>  signbitwhererb  )r   r   rconds       rP   	remainderOpDecompositions.remainderL  su    GGAMxxFF1cll1ekk23FF3;;q>3;;q>2
 yyswwq}a00rR   c                X    [         R                  " [         R                  " U 5      U5      $ rj   )r0   r  roundrv  s     rP   round_to_intOpDecompositions.round_to_intU  ry  rR   r   N)rd   OpVarTrw   r  r@  r  rw   r  )r@  r  rp  r  rq  r  rw   r  )r   r  r   r   rw   r  r   r  r   r  rw   r  )r|   r}   r~   r   r   r   r9  rA  rE  rK  rO  rR  rY  r\  r_  rc  rh  rl  rr  rw  r|  r  r  r  r   r   rR   rP   r7  r7    s}      < <   C C < < C C R R Q Q M M A A C C < < ) ) 1 1 0 0 1 1 1 1 1 1rR   r7  z[a-z0-9_.]+|\([^)]*\)|)flagsc                    U S   S:w  d  [        U 5      S:  a  gSn[        U SS  5       H8  u  p#US:X  a  US-  nOUS:X  a  US-  nUS:X  d  M$  U[        U 5      S-
  :w  d  M8    g   US:X  d   eg)Nr   (r   FrD   r   T)r  	enumerate)stringr   ichars       rP   _all_in_parensr  ]  s    ayC3v;?EVABZ(3;QJES[QJEA:!s6{Q. ) A::rR   c                     \ rS rSr\S"S j5       r\S#S j5       r\S$S j5       r\S%S j5       r\S&S j5       r	\S&S j5       r
\S&S j5       r\S&S	 j5       r\S&S
 j5       r\S'S j5       r\S(S j5       r  S)         S*S jjr          S+S jrS,S jr S-         S.S jjrS/S jr          S0S jr        S1S jr          S2S jr  S3               S4S jjrS5S jrS\R6                  SSS.             S6S jjrS7S jrS8S jr\S9S j5       r\ S:S j5       r!\ S;S  j5       r"S!r#g)<OpOverridesil  c                    [        U [        5      (       d*  [        R                  U 5      (       d  [	        U 5      (       a  U $ SU  S3$ Nr  r   )rn  CSEVariable_RE_PAREN_NOT_NEEDED	fullmatchr  )r  s    rP   parenOpOverrides.parenm  sB     v{++#--f55f%% M6(!}rR   c                    [        U 5      $ rj   )repr)rd   r   s     rP   r  OpOverrides.constantx  s    E{rR   c                2    S[         R                  U 5       3$ )N~r  r  r?  s    rP   bitwise_notOpOverrides.bitwise_not|  s    ;$$Q'())rR   c                2    [         R                  U 5       S3$ )Nz == 0r  )r   s    rP   logical_notOpOverrides.logical_not  s    ##A&'u--rR   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz & r  r@  rp  s     rP   bitwise_andOpOverrides.bitwise_and  +    ##A&'s;+<+<Q+?*@AArR   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz | r  r  s     rP   
bitwise_orOpOverrides.bitwise_or  r  rR   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz ^ r  r  s     rP   bitwise_xorOpOverrides.bitwise_xor  r  rR   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz << r  r  s     rP   bitwise_left_shiftOpOverrides.bitwise_left_shift  +    ##A&'tK,=,=a,@+ABBrR   c                \    [         R                  U 5       S[         R                  U5       3$ )Nz >> r  r  s     rP   bitwise_right_shiftOpOverrides.bitwise_right_shift  r  rR   c                .    [         R                  " X5      $ rj   )r0   r=  r   s     rP   int_truedivOpOverrides.int_truediv  s    
 {{1  rR   c                X    [         R                  " U [        R                  " U5      5      $ rj   )r0   r  r   Integer)rb   r  s     rP   r  OpOverrides.load_seed  s    xxemmF344rR   Tc                *    [        [        U5      5      $ rj   )r,   rz   )re   r  r   checkwrap_negs        rP   indirect_indexingOpOverrides.indirect_indexing  s     "#c(++rR   c                D    [        [        U 5      R                   S35      e)Nz,: check_bounds should be handled by CSEProxyr   ro  r|   re   r  r   loweruppers        rP   check_boundsOpOverrides.check_bounds  s'     "Dz""##OP
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz$: load should be handled by CSEProxyr  re   rb   r  s      rP   r  OpOverrides.load  s%    !Dz""##GH
 	
rR   Nc                D    [        [        U 5      R                   S35      e)Nz%: store should be handled by CSEProxyr  re   rb   r  rd   rY   s        rP   r  OpOverrides.store  s'     "Dz""##HI
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz/: store_reduction should be handled by CSEProxyr  re   rb   r  rd   s       rP   r  OpOverrides.store_reduction  s%    !Dz""##RS
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz): reduction should be handled by CSEProxyr  re   r   	src_dtypereduction_typerd   s        rP   r  OpOverrides.reduction  s'     "Dz""##LM
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz$: scan should be handled by CSEProxyr  re   dtypes
combine_fnvaluess       rP   scanOpOverrides.scan  s'     "Dz""##GH
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz$: sort should be handled by CSEProxyr  re   r  r  stable
descendings        rP   sortOpOverrides.sort  s'     "Dz""##GH
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz): bucketize should be handled by CSEProxyr  re   r  
boundariesboundary_indicesindexing_dtyperightsortersorter_indicess           rP   	bucketizeOpOverrides.bucketize  s'     "Dz""##LM
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz2: halide_clamp only implemented for Halide backendr  )re   rd   r   r  s       rP   halide_clampOpOverrides.halide_clamp  s%    !Dz""##UV
 	
rR   rD   )constraintsr   is_purepackc               D    [        [        U 5      R                   S35      e)Nz<: inline_asm_elementwise only implemented for Triton backendr  )re   asmr  r   r  r  r   s          rP   inline_asm_elementwise"OpOverrides.inline_asm_elementwise  s'     "Dz""##_`
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz.: ops.output should not appear at codegen timeAssertionErrorro  r|   ro   s     rP   r  OpOverrides.output  s%    Dz""##QR
 	
rR   c                D    [        [        U 5      R                   S35      e)Nz3: ops.placeholder should not appear at codegen timer  re   r  s     rP   r  OpOverrides.placeholder  s%    Dz""##VW
 	
rR   c                4   ^  SU 4S jjnT Ul         SUl        U$ )Nc                J   > [        [        U 5      R                   ST 35      e)Nz does not implement ops.r  )re   rp   r  rb   s      rP   unimplemented1OpOverrides._unimplemented.<locals>.unimplemented  s*    %:&&''?vF rR   T)re   r  rp   r	   r  r	   rw   r  )r|   is_unimplemented)rb   r  s   ` rP   _unimplementedOpOverrides._unimplemented  s     	
 "&)-&rR   c                    [        XS 5      n[        [        US 5      nU(       + =(       d    X#:H  =(       d    [        USS5      $ )Nr  F)getattrr1   )r  rb   fn
default_fns       rP   _is_unimplementedOpOverrides._is_unimplemented  s;    S%Zt4
vS)SWR9KU-SSrR   c                `   US;   d   U5       e[         R                  5        H  u  p#[        X15      nUc5  U R                  U5      (       a  [	        XU R                  U5      5        MF  MH  X R                  ;  d   SU SU R                   35       eX$l        [	        X[        U5      5        M     g )N)r  r  cppvecr  r  zmultiple definitions of z on )	pointwise_overrides_datar  r  r  setattrr  __dict__r|   r   )r  r  funcnamedataimpls        rP   _initialize_pointwise_overrides+OpOverrides._initialize_pointwise_overrides"  s    EEMvME6<<>NH4(D|((22C3+=+=h+GH 3  ||3 .xjS\\NK3 !)|D'9: ?rR   r   )r  r  rw   r  )rd   zUnion[bool, float, int]r   r   rw   r  r  )r   r  rw   r  )r@  r  rp  r  rw   r  r  )rb   rz   r  r  rw   r  TT)
r  r  r   Union[sympy.Expr, int]r  r   r  r   rw   sympy.Symbol
r  r   r   r   r  r   r  r   rw   rx   )rb   rz   r  r   rw   r  rj   )
rb   rz   r  r   rd   r  rY   r4   rw   rx   )rb   rz   r  r   rd   r  rw   rx   )
r   r   r  r   r  r3   rd   !Union[OpVarT, tuple[OpVarT, ...]]rw   r$  )r  tuple[torch.dtype, ...]r  zFCallable[[tuple[OpVarT, ...], tuple[OpVarT, ...]], tuple[OpVarT, ...]]r  tuple[OpVarT, ...]rw   r&  )
r  r%  r  r&  r  r   r  r   rw   r&  NN)r  r  r  .tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]r  r  r  r   r  r   r   Optional[tuple[str, sympy.Expr]]r  zOptional[OpVarT]rw   r  )rd   r  r   r   r  r   rw   r  )r   r  r  rz   r  r  r   r   r  r   r  r   rw   r  )rp   r  rw   rx   )r  r   rw   r  )rb   rz   rw   zCallable[..., OpVarT]rb   rz   rw   r   )r  rz   rw   rx   )$r|   r}   r~   r   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   rI  r   r  r  r  r(  r  r  r   r   rR   rP   r  r  l  s'       * * . . B B B B B B C C C C ! ! 5 5 ,, %, 	,
 , 
,

&0
9=
FJ
	

 NR

 *
39
AJ
	


	
	
 	
 &		

 1	
 
+	

'


 #
 

	
'	
 #	
 		

 	
 
	
$ 48+/

 C
 !	

 $
 
 1
 )
 


 &*"]]

 
 #	

 
 
 
 





   T T
 ; ;rR   r  c                      \ rS rSr% S\S'   S\S'   SrS\S'   SrS\S	'   \R                  r	S
\S'   Sr
S\S'   SrS\S'   Srg)OverridesDatai3  rz   rb   r   r  NzOptional[Callable[..., str]]r  r  r   type_promotion_kindr  r  r   )r|   r}   r~   r   r   r  r  r   DEFAULTr-  r  r  r   r   rR   rP   r,  r,  3  sQ    
I	+/F(/+/F(/'// 8  ,0F(/(,C	%,rR   r,  airy_aic                    SU  S3$ )Nzairy_ai_forward(r   r   r?  s    rP   r  r  G  s    (1-rR   special_airy_ai)r-  r  rb   	bessel_j0c                    SU  S3$ )Nzbessel_j0_forward(r   r   r?  s    rP   r  r  L      *1#Q/rR   c                    SU  S3$ )Nzlibdevice.j0(r   r   r?  s    rP   r  r  M      =1-rR   special_bessel_j0)r-  r  r  rb   	bessel_j1c                    SU  S3$ )Nzbessel_j1_forward(r   r   r?  s    rP   r  r  R  r4  rR   c                    SU  S3$ )Nzlibdevice.j1(r   r   r?  s    rP   r  r  S  r6  rR   special_bessel_j1	bessel_y0c                    SU  S3$ )Nzbessel_y0_forward(r   r   r?  s    rP   r  r  X  r4  rR   c                    SU  S3$ )Nzlibdevice.y0(r   r   r?  s    rP   r  r  Y  r6  rR   special_bessel_y0	bessel_y1c                    SU  S3$ )Nzbessel_y1_forward(r   r   r?  s    rP   r  r  ^  r4  rR   c                    SU  S3$ )Nzlibdevice.y1(r   r   r?  s    rP   r  r  _  r6  rR   special_bessel_y1digammac                    SU  S3$ )Nzcalc_digamma(r   r   r?  s    rP   r  r  d  s    aS*rR   c                    U  S3$ )Nz
.digamma()r   r?  s    rP   r  r  e  s    A3j)rR   )r-  r  r  rb   rO  c                    SU  S3$ )Nzcalc_erfcx(r   r   r?  s    rP   r  r  l      A3a(rR   c                    SU  S3$ )Nzlibdevice.erfcx(r   r   r?  s    rP   r  r  m  s    +A3a0rR   special_erfcxrr  c                    SU  SU SU S3$ )Nz	std::fma(r   r   r   ro  s      rP   r  r  r  s    is"QCr!A6rR   c                    SU  SU SU S3$ )Nzfmadd(r   r   r   ro  s      rP   r  r  s  s    s"QCr!A6rR   c                    SU  SU SU S3$ )Nzlibdevice.fma(r   r   r   ro  s      rP   r  r  t  s    s"QCr!A>rR   )r-  r  r  r  rb   igammac                    SU  SU S3$ Nzcalc_igamma(r   r   r   r  s     rP   r  r  z      <s"QCq1rR   igammacc                    SU  SU S3$ Nzcalc_igammac(r   r   r   r  s     rP   r  r        =2aS2rR   gammaincc                    SU  SU S3$ rP  r   r  s     rP   r  r    rQ  rR   special_gammainc	gammainccc                    SU  SU S3$ rT  r   r  s     rP   r  r    rU  rR   special_gammaincci0c                    SU  S3$ )Nzcalc_i0(r   r   r?  s    rP   r  r        1orR   c                    SU  S3$ Nzlibdevice.cyl_bessel_i0(r   r   r?  s    rP   r  r        3A3a8rR   c                    U  S3$ )Nz.i0()r   r?  s    rP   r  r    s
    A3erR   )r-  r  r  r  rb   i0ec                    SU  S3$ )Nz	calc_i0e(r   r   r?  s    rP   r  r        	!A&rR   c                    U  S3$ )Nz.i0e()r   r?  s    rP   r  r    s
    A3frR   special_i0ei1c                    SU  S3$ )Nzcalc_i1(r   r   r?  s    rP   r  r    r^  rR   c                    SU  S3$ Nzlibdevice.cyl_bessel_i1(r   r   r?  s    rP   r  r    ra  rR   
special_i1i1ec                    SU  S3$ )Nz	calc_i1e(r   r   r?  s    rP   r  r    re  rR   special_i1elog_ndtrc                    SU  S3$ )Nzcalc_log_ndtr(r   r   r?  s    rP   r  r    s    qc+rR   special_log_ndtrmodified_bessel_i0c                    SU  S3$ )Nzmodified_bessel_i0_forward(r   r   r?  s    rP   r  r        3A3a8rR   c                    SU  S3$ r`  r   r?  s    rP   r  r    ra  rR   special_modified_bessel_i0modified_bessel_i1c                    SU  S3$ )Nzmodified_bessel_i1_forward(r   r   r?  s    rP   r  r    ru  rR   c                    SU  S3$ rk  r   r?  s    rP   r  r    ra  rR   special_modified_bessel_i1modified_bessel_k0c                    SU  S3$ )Nzmodified_bessel_k0_forward(r   r   r?  s    rP   r  r    ru  rR   special_modified_bessel_k0modified_bessel_k1c                    SU  S3$ )Nzmodified_bessel_k1_forward(r   r   r?  s    rP   r  r    ru  rR   special_modified_bessel_k1ndtrc                    SU  S3$ )Nz
calc_ndtr(r   r   r?  s    rP   r  r    s    
1#Q'rR   special_ndtrndtric                    SU  S3$ )Nzcalc_ndtri(r   r   r?  s    rP   r  r    rH  rR   special_ndtri	polygammac                *    U  SU SU  SU SU SU  S3$ )Nz == 0 ? calc_digamma(z) : (z == 1 ? trigamma(z) : calc_polygamma(r   z))r   r  s     rP   r  r    s3    S%aSaS0A!DWXYWZZ\]^\__abrR   scaled_modified_bessel_k0c                    SU  S3$ )Nz"scaled_modified_bessel_k0_forward(r   r   r?  s    rP   r  r        :1#Q?rR   !special_scaled_modified_bessel_k0scaled_modified_bessel_k1c                    SU  S3$ )Nz"scaled_modified_bessel_k1_forward(r   r   r?  s    rP   r  r    r  rR   !special_scaled_modified_bessel_k1spherical_bessel_j0c                    SU  S3$ )Nzspherical_bessel_j0_forward(r   r   r?  s    rP   r  r    s    4QCq9rR   special_spherical_bessel_j0zetac                    SU  SU S3$ )Nzzeta(r   r   r   r  s     rP   r  r    s    52aS*rR   special_zetachebyshev_polynomial_tc                    SU  SU S3$ )Nzchebyshev_polynomial_t_forward(r   r   r   r  s     rP   r  r        :1#Rs!DrR   special_chebyshev_polynomial_tchebyshev_polynomial_uc                    SU  SU S3$ )Nzchebyshev_polynomial_u_forward(r   r   r   r  s     rP   r  r    r  rR   special_chebyshev_polynomial_uchebyshev_polynomial_vc                    SU  SU S3$ )Nzchebyshev_polynomial_v_forward(r   r   r   r  s     rP   r  r    r  rR   special_chebyshev_polynomial_vchebyshev_polynomial_wc                    SU  SU S3$ )Nzchebyshev_polynomial_w_forward(r   r   r   r  s     rP   r  r    r  rR   special_chebyshev_polynomial_wlegendre_polynomial_pc                    SU  SU S3$ )Nzlegendre_polynomial_p_forward(r   r   r   r  s     rP   r  r        9!BqcCrR   special_legendre_polynomial_pshifted_chebyshev_polynomial_tc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_t_forward(r   r   r   r  s     rP   r  r        B1#Rs!LrR   &special_shifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_uc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_u_forward(r   r   r   r  s     rP   r  r    r  rR   &special_shifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_v_forward(r   r   r   r  s     rP   r  r    r  rR   &special_shifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_wc                    SU  SU S3$ )Nz'shifted_chebyshev_polynomial_w_forward(r   r   r   r  s     rP   r  r    r  rR   &special_shifted_chebyshev_polynomial_whermite_polynomial_hc                    SU  SU S3$ )Nzhermite_polynomial_h_forward(r   r   r   r  s     rP   r  r    s    82aSBrR   special_hermite_polynomial_hhermite_polynomial_hec                    SU  SU S3$ )Nzhermite_polynomial_he_forward(r   r   r   r  s     rP   r  r    r  rR   special_hermite_polynomial_helaguerre_polynomial_lc                    SU  SU S3$ )Nzlaguerre_polynomial_l_forward(r   r   r   r  s     rP   r  r  !  r  rR   special_laguerre_polynomial_lzdict[str, OverridesData]r  c                   ^  [        U 4S j[        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R
                  4 5       5      $ )Nc              3  .   >#    U  H
  nTU;   v   M     g 7frj   r   )r  r@  rb   s     rP   r  $is_buffer_removed.<locals>.<genexpr>(  s       
A 		
s   )anyr5   r   removed_bufferskernelinplaced_to_removerb   s   `rP   is_buffer_removedr  '  sU      GG##HH$$GG&&HH''	
  rR   c                  D   ^  \ rS rSrSrSU 4S jjrSS jrS	S jrSrU =r	$ )
DeferredLinei3  zHA line that can be 'unwritten' by adding name to V.graph.removed_buffersc                ^   > [         TU ]  U5        Xl        [        U[        5      (       a   eg rj   )r0  r   rb   rn  r%   )re   rb   liner2  s      rP   r   DeferredLine.__init__6  s,    	d$455555rR   c                P    [        U R                  5      (       d  U R                  $ g rj   )r  rb   r  rk   s    rP   __call__DeferredLine.__call__;  s     ++99rR   c                .    [        U R                  U5      $ rj   )r  rb   )re   r  s     rP   	_new_lineDeferredLine._new_line@  s    DIIt,,rR   r  )rb   rz   r  rz   r	  )r  rz   rw   r  )
r|   r}   r~   r   r   r   r  r  r   r4  r5  s   @rP   r  r  3  s    R6

- -rR   r  c                  "    \ rS rSrSSS jjrSrg)BracesBufferiD  c                L   ^ ^ [         R                  SUU 4S jj5       nU" 5       $ )Nc               3    >#    [        T5       H)  n TR                  S5        T=R                  S-  sl        M+     [        T* 5       H)  n T=R                  S-  sl        TR                  S5        M+     S v   [        T* 5       H)  n TR                  S5        T=R                  S-  sl        M+     [        T5       H)  n T=R                  S-  sl        TR                  S5        M+     g 7f)N{rD   })ranger  _indent)_r  re   s    rP   ctx BracesBuffer.indent.<locals>.ctxF  s     6]s#! # F7^!s# $ F7^s#! $ 6]!s# #s   C(C+)rw   Iterator[None])
contextlibcontextmanager)re   r  r  s   `` rP   indentBracesBuffer.indentE  s$    		"	"	$ 
#	$ urR   r   N)rD   )r  r   rw   z'contextlib.AbstractContextManager[None])r|   r}   r~   r   r  r   r   rR   rP   r  r  D  s     rR   r  c                  *    \ rS rSr% S\S'   S\S'   Srg)InplacedBufferiY  rz   r   r   other_namesr   Nr  r   rR   rP   r  r  Y  s    OrR   r  c                  8    \ rS rSr% S\S'   SrS\S'   S
S jrSrg	)ArgNamei^  rz   rb   Fr   is_constexprc                L    U R                    U R                  (       a  S 3$ S 3$ )Nz : tl.constexprr  )rb   r  rk   s    rP   	full_nameArgName.full_named  s*    ))$2C2C.LMMLMMrR   r   Nry   )r|   r}   r~   r   r   r  r  r   r   rR   rP   r  r  ^  s    
IL$NrR   r  c                      \ rS rSrSS jrSrg)
RemovedArgih  c                    g)NREMOVEDr   rk   s    rP   __str__RemovedArg.__str__i  s    rR   r   Nry   )r|   r}   r~   r   r  r   r   rR   rP   r  r  h  s    rR   r  c                     \ rS rSr\        SS j5       rSS jrSS jr\SS j5       rSS jr	SS jr
SS jrSS	 jrSS
 jrS S jrS!S jrS"S jrS#S jrS$S jrS%S jr S&   S'S jjr  S(S jrS)S jrS*S jrS+S jrSrg),
KernelArgsip  c                    UR                  U[        5      n[        U[        5      (       a  U  [	        U5       3=X'   nU$ U$ rj   )getr  rn  r  r  )r   odictrb   result
new_results        rP   _lookupKernelArgs._lookupq  sE     */4)Afj))*0#e*'>>EK*rR   c                J    0 U l         0 U l        0 U l        0 U l        / U l        g rj   )input_buffersoutput_buffersinplace_buffersr-  workspace_argsrk   s    rP   r   KernelArgs.__init__}  s)    -/ACMO/124rR   c                    SR                  SR                  [        [        U R                  U R
                  U R                  U R                  /5      5      5      $ )NzKernelArgs({})r   )formatr   mapr  r  r  r  r-  rk   s    rP   __repr__KernelArgs.__repr__  sS    &&II**++,,	

 	
rR   c                "    [        U [        5      $ rj   )rn  r  r  s    rP   _buffer_is_marked_removed$KernelArgs._buffer_is_marked_removed  s     $
++rR   c                T   [         R                  R                  (       a3  [         R                  R                  R                  R	                  X5      nU[         R                  R
                  ;  d   U5       eXR                  ;   a  [        [        U R                  U   5      $ XR                  ;   a'  [        [        U R                  U   5      R                  $ UR                  S5      (       a  U R                  SU R                  U5      $ U R                  SU R                  U5      $ )Nseedin_ptr)r5   r   r#  mutation_real_namer  r  r  r   rz   r  r  r   r  r  r  r  s     rP   inputKernelArgs.input  s    7777$$77;;DGD1772228D82&&&T00677'''(<(<T(BCNNN??6""<<(:(:DAA||Hd&8&8$??rR   c                   [         R                  R                  (       a3  [         R                  R                  R                  R	                  X5      nU[         R                  R
                  ;  d   U5       eXR                  ;   a'  [        [        U R                  U   5      R                  $ U R                  SU R                  U5      $ )Nout_ptr)r5   r   r#  r  r  r  r  r   r  r   r  r  r  s     rP   r  KernelArgs.output  s    7777$$77;;DGD1772228D82'''(<(<T(BCNNN||It':':DAArR   c                ,   U[         R                  R                  ;   a)  [         R                  R                  R                  U5        X R                  ;  d   U5       eXR                  ;   aP  U R                  U   n[        U[        5      (       a   eUR                  R                  U5        X0R                  U'   g U R                  R                  5        Vs/ s H  n[        U[        5      (       a  M  UPM     nnU R                  R                  5        Vs/ s H  n[        U[        5      (       d  M  UPM     nn[        [        U5      5      [        U5      -   n[        SU 3X/5      nX0R                  U'   X0R                  U'   g s  snf s  snf )N
in_out_ptr)r5   r   unaligned_buffersrb  r  rn  r  r  appendr  r  r/   r  )re   
input_nameoutput_namebufvalalive_buffersr  inplace_buffer_idxs           rP   make_inplaceKernelArgs.make_inplace  sh   222GG%%))+6"6"66CC6---&&z2C!#z2222OO"";/03  -  //6688C!#z2 8    //6688Cc:. 8  
 "%VM%:!;c/>R!R /01)C 03  ,03  -!
s   F6FF7Fc                P   [        U[        R                  U5      [        R                  R                  5       [         R                  5       S9n[        U R                  5       H  u  pE[         R                  XS5      (       a>  UR                  n[         R                  XS5      U R                  U'   UR                  U4s  $ UR                  UR                  :w  a  UR                  UR                  :w  a  M   U5       e   U R                  R                  U5        UR                  S4$ )a  
Allocate or extend a workspace buffer of nbytes bytes.

This function manages the allocation of a workspace buffer. It either creates
a new WorkspaceArg or extends an existing one.

Note:
- Calling this function will in-place mutate the args by adding or updating
a WorkspaceArg.
- The codegen for generating the Python argdefs and call_defs will check
this field and allocate the buffer accordingly.
- A new argument "ws_ptr" will be present in the generated code.

Args:
    nbytes (sympy.Expr): The number of bytes to allocate.
    zero_fill (bool): Whether to initialize the buffer to zero.

Returns:
    Tuple[str, int]: A tuple containing:
        - "ws_ptr": A string identifier for the workspace pointer.
        - offset: An integer representing the byte offset in the workspace.
)r   r   r   r   r   )r   r   r   r5   r   get_current_device_or_throwr   r  r  r   r   r   r   r   r  )re   nbytesr   argr  existing_argr  s          rP   rT  KernelArgs.workspace  s    . '11)<77668#//1	
  ))<)<=OA$$\77%++)5):):<)M##A&#..66''3>>9 ++s~~= >  > 	""3'~~q  rR   c           
        [         R                  R                  5       n[        U[        R
                  [        R                  SSUR                   SUR                   3US9nU R                   H,  nUR                  UR                  :X  d  M  X4:X  a  M&   X445       e   U R                  R                  U5        UR                  $ )a  
Lazily allocate a graph-wide semaphores buffer with at least min_size.  This is a single buffer shared by
all kernels and zero initialized once at graph start.  Each kernel must leave the buffer zeroed on exit.

Warning: multiple calls to this function will return the same buffer.

Args:
    min_size: the number of int32 semaphores required

Returns:
    name of the semaphores buffer
sem_ptrsemaphores_r  )r   r   r   r   r   r   )r5   r   r  r   r   r   r   uint32ro  r  r  r   r  )re   min_sizecurrent_devicer   r!  s        rP   
semaphoresKernelArgs.semaphores  s     <<>'66,, $^%8%8$9>;O;O:PQ!
 !//L&&#..8*?S,??* 0 	""3'~~rR   c                z  ^ [        U[        5      (       d   [        U5      U45       e[        R                  " U5      nX R
                  ;   a  U R
                  U   $ TU R
                  R                  5       ;   a1  T [        U4S jU R
                  R                  5        5       5       3mTU R
                  U'   T$ )Nc              3  V   >#    U  H  oR                  T5      (       d  M  S v   M      g7f)rD   N)r  )r  r  rb   s     rP   r  )KernelArgs.seed_offset.<locals>.<genexpr>  s     U(>1,,tBTQQ(>s   )	))rn  r   ro  r   r  r-  r  sum)re   rb   rd   s    ` rP   seed_offsetKernelArgs.seed_offset  s    %%%;UU';;%e$MM!==''4==''))&U(<(<(>UUVW   $erR   c                    [        U[        R                  5      (       d   [        U5      U45       eUR                  S:X  a  SU R
                  U'   gU R                  SU R
                  U5      $ )Nr	  ks)rn  r   Symbolro  rb   r-  r  r  s     rP   r   KernelArgs.size  s[    $--AT
D/AA-99"(DMM$||D$--66rR   c                    [        U R                  R                  5       U R                  R                  5       U R                  R                  5       5      $ rj   )r   r  keysr  r-  rk   s    rP   
call_namesKernelArgs.call_names!  sA    ##%t':':'?'?'A4==CUCUCW
 	
rR   c                &   U R                   R                  US5      nUb!  [        U[        5      (       d  UR                  $ U R
                  R                  US5      nUb  [        U[        5      (       d  U$ U R                  R                  US5      $ )z+
Returns inner name of a given outer name.
N)r  r  rn  r  r   r  r  )re   rb   inplacedr  s       rP   arg_nameKernelArgs.arg_name&  s     ''++D$7
8Z(H(H&&&))--dD9":k:+N+N!!%%dD11rR   c                    U$ rj   r   )re   r  r   s      rP   wrap_ptr_argKernelArgs.wrap_ptr_arg2  s    
rR   c                    [        U5      $ rj   )rz   )re   r   s     rP   wrap_size_argKernelArgs.wrap_size_arg5  s    4yrR   Nc                   SSK Jn  Uc  SSK Jn  Un/ n/ n/ n[        U R                  R                  5       5       H  n[        U[        5      (       a  M  UR                  S   nUR                  n	[        R                  R                  U5      n
X   nUR                  U SU	 35        UR                  U R                  X5      5        UR                  U S35        M     U R                  R!                  5        H  u  pXR                  ;   a  M  [        R                  R                  U5      n
X   nUR                  SU SU	 35        UR                  U R                  X5      5        UR                  SU S35        M     U R"                  R!                  5        H  u  pXR                  ;   d  [        U[        5      (       a  M+  [        R                  R                  U5      n
X   nUR                  U SU 35        UR                  U R                  X5      5        UR                  U S35        M     U R$                  R!                  5        H  u  pUR                  SU SU	 35        UR                  U R'                  U5      5        UR                  SU 35        [        R                  R(                  (       d  Mq  [        R                  R(                  R+                  U5        M     U R,                  (       a   S	5       eXTU4$ )
NrD   )
INDEX_TYPE)r  r  z* *zconst  zWorkspace not supported on CPU )r  rD  r  r/   r  r  rn  r  r  r   r5   r   r   r  r>  r  r  r  r-  rA  wrapper_codeensure_size_computedr  )re   dtype_to_cpp_typerD  r  	call_argsarg_defs	arg_typesr:  outerinnerr   	cpp_dtypemaybe_inners                rP   cpp_argdefsKernelArgs.cpp_argdefs8  s    	*$/ ,		t33::<=H(J//((,E''EGG%%e,E)0IOOykE734T..u<=	{!_- > !..446LE,,,GG%%e,E)0IOOfYKr%9:T..u<=vi[23 7 #'"5"5";";"=E,,,
;
0S0SGG%%e,E)0IOOykK=9:T..u<=	{!_- #> !MM//1LEOOfZL%9:T//67vj\23ww###$$99%@ 2 &&I(II&I--rR   c                   / n/ n/ n/ n[        U R                  R                  5       5       H  n[        U[        5      (       a  M  UR                  [        UR                  5      5        UR                  UR                  S   5        UR                  [        R                  R                  UR                  S   5      5        UR                  [        UR                  UR                  S   [        R                  R                  UR                  S   5      S95        M     [        U R                  R                  5       U R                   R                  5       5       H  u  pgX`R                  ;   d  [        U[        5      (       a  M+  UR                  [        U5      5        UR                  U5        UR                  [        R                  R                  U5      5        UR                  [        UU[        R                  R                  U5      S95        M     U R"                  R                  5        H  u  pgUR                  [        U5      5        UR                  U5        UR                  [%        U5      5        UR                  ['        Xv5      5        [        R                  R(                  (       d  M  [        R                  R(                  R+                  U5        M     U R,                   Hn  nUR                  [        UR                  5      5        UR                  UR.                  5        UR                  U5        UR                  UR0                  5        Mp     XXC4$ )Nr  )rb   r   r   )r/   r  r  rn  r  r  r  r   r  r5   r   r   r   r   r  r  r  r-  ro  r  rG  rH  r  r   r   )	re   rK  rJ  rL  precompile_argsr:  rM  rN  r   s	            rP   python_argdefsKernelArgs.python_argdefsh  s    #%!	!	/1t33::<=H(J//OOGH$7$789X11"56QWW..x/C/CB/GHI""!,,#//3''++H,@,@,DE > "$$&(;(;(A(A(C
LE ,,,
5*0M0MOOGEN+U#QWW..u56"" ''++E2
 !MM//1LEOOGEN+U#T%[)""75#89ww###$$99%@ 2 &&COOGCNN34S^^,""3'SYY'	 '
 O>>rR   c              #     #    [        U R                  R                  5       5       H  n[        U[        5      (       a  M  UR
                   H  nU[        R                  R                  ;   d  U[        R                  R                  ;   a  MA  X R                  ;   a  U R                  U   UR                  4v   X R                  ;   d  M~  [        [        U R                  U   5      UR                  4v   M     M     g 7frj   )r/   r  r  rn  r  r  r5   r   r  r  r  r   r  r   rz   )re   r:  others      rP   aliasesKernelArgs.aliases  s     t33::<=H(J//!--QWW777 ; ;;...,,U3X5H5HHH///sD$7$7$>?ATATTT . >s   CC>
4C>c                    [        U R                  R                  U[        5      [        5      =(       a.    [        U R
                  R                  U[        5      [        5      $ rj   )rn  r  r  r  r  r  r  s     rP   
is_removedKernelArgs.is_removed  sK    ##D'2J
 N--11$@*M	NrR   c                   [        5       n[        U R                  R                  5       5       H8  n[	        U[
        5      (       a  M  UR                  UR                  S   5        M:     U R                  R                  5        H<  u  p4X0R                  ;   d  [	        U[
        5      (       a  M+  UR                  U5        M>     U$ )Nr  )
r   r/   r  r  rn  r  rb  r  r  r  )re   	live_outsr:  rM  rN  s        rP   live_output_buffersKernelArgs.live_output_buffers  s    %/\	t33::<=H(J//MM(..r23 > !//557LE,,,
5*0M0MMM%  8 rR   )r  r  r  r-  r  )r   rz   r  z6Union[dict[_T, Union[str, RemovedArg]], dict[_T, str]]rb   rG   rw   rz   rv   ry   )rb   r	   rw   r   rW  )r  rz   r  rz   rw   rx   )r  r   r   r   rw   ztuple[str, int])r'  r   rw   rz   )rb   rz   rd   r   rw   rz   )rb   r"  rw   rz   )rw   zIterator[str])rb   rz   rw   r  )r  rz   r   r   rw   rz   )r   
SymbolLikerw   rz   rj   )rI  z Optional[dict[torch.dtype, str]]rw   z&tuple[list[str], list[str], list[str]])rw   z?tuple[list[ArgName], list[str], list[KernelArgType], list[Any]])rw   zIterator[tuple[str, str]]r*  )rw   zOrderedSet[str])r|   r}   r~   r   r   r  r   r  r  r  r  r  rT  r)  r/  r   r7  r;  r>  rA  rQ  rU  rY  r\  r`  r   r   rR   rP   r  r  p  s    		E	 	 
		 	5
 , ,
@B48'!R87


2 EI..!A..	/..`/?	H/?bUN
rR   r  c                  r   ^  \ rS rSrSr S
     SU 4S jjjrSS jrSS jrSS jrSS jr	SS jr
S	rU =r$ )r  i  a4  A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
To do so, the backends can simply overload `Kernel.create_cse_var`
The "CSEVariable.update_on_args" method gives you a hook for annotations
See example of TritonCSEVariable in triton.py
c                   > [         TU ]  5         [        U[        5      (       d   [	        U5      5       eXl        X l        SU l        X0l        g r<  )	r0  r   rn  r   ro  rb   bounds	use_countr   )re   rb   re  r   r2  s       rP   r   CSEVariable.__init__  sB     	&+..<V<.	
rR   c                    U R                   $ rj   r  rk   s    rP   r  CSEVariable.__str__  s    yyrR   c                ,    [        U R                  5      $ rj   )hashrb   rk   s    rP   __hash__CSEVariable.__hash__  s    DIIrR   c                b    [        U[        5      =(       a    UR                  U R                  :H  $ rj   )rn  r  rb   )re   rX  s     rP   __eq__CSEVariable.__eq__  s!    %-I%**		2IIrR   c                    g rj   r   )re   rb   rp   r  s       rP   update_on_argsCSEVariable.update_on_args  s    rR   c                P    U R                   R                   SU R                  < S3$ r  )r2  r|   rb   rk   s    rP   r  CSEVariable.__repr__  s$    ..))*!DII=::rR   )re  r   rb   rf  rj   )rb   rz   re  ValueRanges[Any]r   r  ry   )rw   r   )rX  objectrw   r   )rb   rz   rp   r	   r  r	   rw   rx   )r|   r}   r~   r   r   r   r  rl  ro  rr  r  r   r4  r5  s   @rP   r  r    sR     (,	 ! %	 J; ;rR   r  AugmentedKeyT)default)boundry  .c                  f   \ rS rSrSr       S             SS jjrSS jrSS jrSS jrSS jr	SS	 jr
SS
 jrSS jrSS jr\R                  " 5       SSSS.             SS jjr\R                  " 5       S4     SS jjr\R                  " 5       S4       SS jjrSrg)CSEi  z Common subexpression eliminationNc                    Xl         X l        0 U l        X0l        U=(       d    0 U l        U=(       d    0 U l        U=(       d    [        R                  " 5       U l        [        5       U l
        U=(       d    0 U l        g rj   )r   rZ   _cachename_prefixstore_cachereduction_cache	itertoolsr   iter_buffer_idsr   invalidated_storesvarname_map)re   r   rZ   r  iter_buffersr  r  r  s           rP   r   CSE.__init__  sg     FH&ALARPR!r 	 6B5VY__EV3=<7B7HbrR   c                L   / U R                   R                  5       Q H4  u  p#X1;  d  M  U R                   U	 U R                  R                  U5        M6     U(       a<  U R                  R                  5        VVs0 s H  u  pEXQ;   d  M  XE_M     snnU l        g 0 U l        g s  snnf rj   )r  r  r  rb  r~  )re   	keep_varsrb   tmpr  r  s         rP   
invalidateCSE.invalidate  s    44++1134ID#$$T*''++D1 5 ,0KK,=,=,?R,?DA1>414,?RDKDK Ss   9B B c           
         [        U 5      " U R                  U R                  U R                  U R                  U R
                  U R                  U R                  S9$ )N)r   rZ   r  r  r  r  r  )ro  r   rZ   r  r  r  r  r  rk   s    rP   clone	CSE.clone  sN    Dz;;;;((--(((( 00
 	
rR   c                    U R                  5       n[        U R                  5      Ul        [        U R                  5      Ul        [        U R                  5      Ul        U$ )zNReturn a copy of using ScopedDict so changes to *_cache aren't visible in self)r  r*   r~  r  r  )re   new_cses     rP   scoped_copyCSE.scoped_copy  sH    **,#DKK0",T-A-A"B()9)9:rR   c                "    [        [        U5      $ )z@Override this method to augment cache key with backend specifics)r   rx  re   	cache_keys     rP   augment_keyCSE.augment_key$  s    M9--rR   c                >    X R                   U R                  U5      '   g rj   r~  r  )re   r  r  s      rP   putCSE.put(  s    36D$$Y/0rR   c                >    U R                  U5      U R                  ;   $ rj   )r  r~  r  s     rP   containsCSE.contains+  s    	*dkk99rR   c                X    U R                   R                  U R                  U5      S 5      $ rj   )r~  r  r  r  s     rP   try_getCSE.try_get.  s"    {{t//	:DAArR   c                >    U R                   U R                  U5         $ rj   r  r  s     rP   r  CSE.get1  s    {{4++I677rR   T)re  rc   
assignmentr   c          	     V   [        U[        5      (       a  UR                  nU(       d	  U(       d   e[        U[        5      (       aE  UR                  R                  U5      Ul        U=R                  S-  sl        [        [        U5      $ [        U[        5      (       a  UR                  5       nO;[        U[        5      (       a  UR                  nO[        U[        5      (       d   eUnU R                  U5      nU(       Gd  U R                  X65      nU R!                  Xx5        U(       Ga  ["        R$                  R&                  (       a(  ["        R$                  R&                  R)                  USS9  [        U[        5      (       aU  U(       a   UR+                  U R,                   U S35        UR/                  U5        UR+                  U R0                  5        U$ [        U[        5      (       aR  U(       d   eUR+                  UR3                  U R,                   U SUR                   U R0                   35      5        U$ U(       a   U R,                   U SU U R0                   3n	OU U R0                   3n	UR+                  U	5        U(       a[  [4        R6                  R8                  (       d  [4        R6                  R:                  (       a  Ub  [=        5       S:w  a  [?        XU5        U$ UR                  R                  U5      Ul        U=R                  S-  sl        U$ )NrD   T)	only_oncez =z = r  ) rn  r2   rd   r  re  tightenrf  r   r  r(   getvaluer%   r  rz   r  newvarr  r5   r  current_nodecodegen_originating_infor  r   splicerZ   r  r   r  r  r  r'   r  )
re   r   r  re  rc   r  r   r  r  r  s
             rP   generateCSE.generate4  sk    dH%%::D
""dK(( ++--f5DKNNaN..n--I.//		IdC((((Ill9%++f,CHHY$88((HH))BB$ C  dN33!((DKK=R)@AMM$'$$T[[1: 
9  &677%%:$$$++se3tyyk$++'WX4 
- ""&++se3tfT[[MJ"&}5$$T* #"//KK%22JJ!-/1U:#F7 
 ++F3CJMMQM
rR   c                    U R                    [        U R                  5       3n[        R                  R                  X1U5      nX@R                  U'   U$ rj   )r  r   r  r5   r  create_cse_varr  )re   re  r   var_namer  s        rP   r  
CSE.newvar|  sN    
 &&'T-A-A(B'CDhh%%h>%("
rR   c                   ^ [         R                  " TU R                  ;  U4S j5        [        R                  R                  TX#5      nX@R                  T'   U$ )Nc                    > ST  3$ )Nzduplicate name: r   r  s   rP   r  CSE.namedvar.<locals>.<lambda>  s    4DTF2KrR   )r   _check_valuer  r5   r  r  )re   rb   re  r   r  s    `   rP   namedvarCSE.namedvar  sO     	(((*K	
 hh%%dF:!$
rR   )	r~  r  r  r  r   r  r  rZ   r  )r  r  r  NNNN)r   rz   rZ   rz   r  rz   r  zOptional[itertools.count[int]]r  z.Optional[MutableMapping[str, CSEVariableType]]r  z<Optional[MutableMapping[ReductionCacheKey, CSEVariableType]]r  z$Optional[dict[str, CSEVariableType]])r  zOrderedSet[CSEVariable]rw   rx   rw   r   )r  rz   rw   rx  )r  rz   r  r  rw   rx   )r  rz   rw   r   )r  rz   rw   zOptional[CSEVariableType])r  rz   rw   r  )r   r(   r  zCUnion[str, CSEVariable, OpsValue, IndentedBuffer, DeferredLineBase]re  rv  rc   r   r  r   r   r  rw   r  )re  rv  r   r  rw   r  )rb   rz   re  rv  r   r  rw   r  )r|   r}   r~   r   r   r   r  r  r  r  r  r  r  r  r   unknownr  r  r  r   r   rR   rP   r|  r|    sv   *  7;FJ <@II I 	I
 5I DI
I :I0	
.7:B8 $/#6#6#8'+FF RF
 !F F F %F 
FT $/#6#6#8'+  % 
	 $/#6#6#8'+	 ! %	
 
 rR   r|  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	CodeGeni  c                V   > [         TU ]  5         [        R                  " 5       U l        g rj   )r0  r   r  	ExitStack
exit_stackre   r2  s    rP   r   CodeGen.__init__  s    $..0rR   c                :    U R                   R                  5         U $ rj   )r  	__enter__rk   s    rP   r  CodeGen.__enter__  s    !!#rR   c                <    U R                   R                  XU5        g rj   )r  __exit__)re   exc_typeexc_valexc_tbs       rP   r  CodeGen.__exit__  s      F;rR   )r  rv   r  r  r	   r  r	   r  r	   rw   rx   )	r|   r}   r~   r   r   r  r  r   r4  r5  s   @rP   r  r    s    1< <rR   r  c                  `  ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'    S"     S#U 4S	 jjjr\	R                  S$S
 j5       r\	R                    S%       S&S jj5       rS'S jrS'S jrS(S jr S)         S*S jjr          S+S jr        S,S jr          S-S jrS.S jr  S%               S/S jjr\S0S j5       r S)         S1S jjr          S2S jrS3S jrS4U 4S jjrS5U 4S jjrS6S jrS7S jrS7S jr    S8S jr S9S jr!S:S  jr"S!r#U =r$$ );Kerneli  r  rz   newvar_prefixrZ   Nz'Optional[Callable[[], OpsHandler[Any]]]	overridesc                ,  > [         TU ]  5         U(       a  [        =R                  S-  sl        U=(       d
    [	        5       U l        [        5       U l        [        5       U l        [        5       U l	        SU l
        SU l        [        U R                  U R                  5      U l        [!        5       U l        [!        5       U l        S U l        S U l        S U l        S U l        [!        5       U l        [!        5       U l        0 U l        SU l        S U l        g )NrD   r   )r0  r   r    generated_kernel_countr  rp   r(   loadscomputestoresnum_loadnum_reductionr|  r  rZ   cser   must_keep_buffersstore_buffer_names
_load_mask_load_otherr  node_to_boundsr  r  inplace_update_buffersmin_elem_per_threadkernel_name)re   rp   increase_kernel_countr2  s      rP   r   Kernel.__init__  s     	 **a/*(JL	#%
%'$&.1$2D2Ddkk.R2<,3=<)-4859OS0:3=<
 79##$ *.rR   c              #     #    U R                   nXl         UR                  R                  5       R                  5       U l         S v   X l         g ! X l         f = f7frj   )r  r$  re  
get_boundsr  )re   r  priors      rP   set_current_nodeKernel.set_current_node  sH     !! "jj//1<<>	& %s   A AA AAAc              #    #    Uc  UnUS L =n(       a
  [        5       nU R                  nU R                  nU R                  nU R                  nXl        X l        X0l        UR                  5       U l         S v   XPl        X`l        Xpl        Xl        U(       a  U(       a   S5       eg g ! XPl        X`l        Xpl        Xl        U(       a  U(       a   S5       ef f = f7f)Nz$unexpected store inside swap_buffers)r(   r  r  r  r  r  )	re   lbcbsbdisallow_storesr  r  r  r  s	            rP   swap_buffersKernel.swap_buffers  s      :B Dj(?(!B

,,hh
??$	FJ"L KHEEEv2 	 J"L KHEEEv2 s   A2C5B( 9/C(0CCc                    [         erj   r  r  s      rP   r  Kernel.load  r"  rR   c                    U R                   n U R                  U l         U R                  X5      X0l         $ ! X0l         f = f)z+A load the depends on an index we have read)r  r  r  )re   rb   r  r  s       rP   indirect_loadKernel.indirect_load  s2    

	DJ99T)JJs   !6 >c                    [         erj   r  r  s       rP   r  Kernel.store_reduction  r"  rR   c                    [         erj   r  r  s        rP   r  Kernel.store  
     "!rR   c                    [         erj   r  r  s        rP   r  Kernel.reduction  
     "!rR   c                    [         erj   r  r  s       rP   r  Kernel.scan  s
     "!rR   c                    [         erj   r  r  s        rP   r  Kernel.sort  r  rR   c                    [         erj   r  rk   s    rP   
var_rangesKernel.var_ranges   r"  rR   c                    [         e)z#
See [Note: Inductor bucketize op]
r  r  s           rP   r  Kernel.bucketize#  s
     "!rR   c                    [         erj   r  rk   s    rP   assert_functionKernel.assert_function2  s    !!rR   c           	        [        U[        5      (       a  [        U5      n[        U[        5      (       d   [        U5      5       eUb  [        U[        5      (       d   eUb  [        U[        5      (       d   eU(       a!  U(       a  SU SU SU SU S3	nU SU SU 3nO#U(       a
  U SU 3nUnOU(       d   eU SU 3nUnU(       a	  SU SU S3nU R                   SU SU S3$ )	Nr  z <= z) & (z < r   z) | ~(z, "index out of bounds: z"))rn  r  rz   ro  r  )re   r  r  r  maskr  
cond_prints          rP   indirect_assertKernel.indirect_assert6  s    c;''c(C#s##.T#Y.#}
5# 6 666}
5# 6 666U ugT#eC5E7!<D!7$se3ug6JWD&DJL5U#eW%DJtfF4&*D&&'q.FzlRTUUrR   c                    [         erj   r  r  s        rP   r  Kernel.check_boundsT  r  rR   c                    [         erj   r  r  s     rP   index_to_strKernel.index_to_strY  r"  rR   c           	     4  > [         TU ]  5         U R                  (       d   eU R                  R	                  [
        R                  " [        X R                  5       5      5      5        U R                  R	                  [
        R                  " U 5      5        U $ rj   )	r0  r  r  r  enter_contextr5   set_ops_handlerCSEProxyset_kernel_handlerr  s    rP   r  Kernel.__enter__\  sj    ~~~%%ht^^-=>?	
 	%%a&:&:4&@ArR   c                F   > U R                  5         [        TU ]	  XU5        g rj   )remove_kernel_local_buffersr0  r  )re   r  r  r  r2  s       rP   r  Kernel.__exit__e  s    ((*F3rR   c                  ^^ [         R                  R                  mT(       d  g[        U4S jU R                   5       5      n[        5       mU R                   HX  nX R
                  ;  d  M  X R                  R                  ;  d  M/  TR                  X!5      (       d  MG  TR                  U5        MZ     T H  nX R                  R                  ;   a  U R                  R                  U   n[        U[        5      (       a  ML  [        U4S jUR                   5       5      nU(       a  U R                  U5        U R                   R                  U5        M  U R#                  U5        M     g)z
Any buffers that are both created and have a last use in the
same kernel can be removed.

Note that V.graph.scheduler can be None when codegening triton template
kernels.
Nc              3     >#    U  H4  nUTR                   ;   d  M  TR                   U   R                  5       v   M6     g 7frj   )name_to_bufdefining_op_name)r  r  r#  s     rP   r  5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>t  s?      &
.i+++ :I!!#&7799.s   ?$?c              3  ,   >#    U  H	  oT;   v   M     g 7frj   r   )r  r  names_to_removes     rP   r  r    s     K?a/1?s   )r5   r   r#  r   r  r  rp   r  $can_buffer_be_removed_through_fusionrb  r  rn  r  r  r  remove_inplace_bufferr  remove_buffer)re   fused_node_namesrb   r  ra   r!  r#  s        @@rP   r  "Kernel.remove_kernel_local_buffersi  s    GG%%	% &
..&
 

 ,6<++D222		 7 77BB   ##D) , $Dyy000ii//5c:..K3??KK..t4''++D1""4( $rR   c                    [         R                  SU5        [        U R                  R                  U'   U R
                  R                  U5        g )Nzremove_buffer(%r))rW  rN   r  rp   r  r  rb  r  s     rP   r$  Kernel.remove_buffer  s;     			%t,)0		  &  &rR   c                    [         R                  SU5        [        U R                  R                  U'   U R
                  R                  U5        g )Nzremoving_inplace_buffer(%r))rW  rN   r  rp   r  r  rb  r  s     rP   r#  Kernel.remove_inplace_buffer  s9    		/6*1		!!$'  &rR   c           	        [        U[        [        45      (       a!  U Vs/ s H  o R                  U5      PM     sn$ [        R
                  R                  R                  U5      n[        UR                  S S9nU Vs0 s H^  n[        U[        R                  [        R                  [        R                  45      (       d  MC  X R                  R!                  U5      _M`     nn[#        X5      $ s  snf s  snf )Nc                    U R                   $ rj   r  )ss    rP   r  (Kernel.rename_indexing.<locals>.<lambda>  s    !&&rR   )r  )rn  listtuplerename_indexingr5   r   r-  r*  sortedfree_symbolsr   r   UNBACKED_INTSIZEPRECOMPUTED_SIZErp   r   r-   )re   r  r@  sorted_symbolsreplacementss        rP   r1  Kernel.rename_indexing  s    
 edE]++5:;U((+U;;  ))%0 2 28HI $
#%%II)) !Ayy~~a  # 	 
 %.. <
s   C1?AC6C6c                    [        U0 UD6$ rj   )r  )re   rp   r  s      rP   r  Kernel.create_cse_var  s    D+F++rR   c                \    Uc  gU R                   R                  UR                  5       5      $ )z3
Returns arg name of a given input or output node.
N)rp   r;  r   )re   r  s     rP   r;  Kernel.arg_name  s'     <yy!!$--/22rR   )r  r  rp   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )NT)rp   zOptional[KernelArgs]r  r   rw   rx   )r  rC   rw   r  r'  )r  r(   r  Optional[IndentedBuffer]r  r>  rw   r  rb   rz   r  r   rw   r  rb   rz   r  r   rd   r  rw   rx   rj   
rb   rz   r  r   rd   r  rY   r4   rw   rx   
r   r   r  r   r  r3   rd   +Union[CSEVariable, tuple[CSEVariable, ...]]rw   rC  r  r%  r  zUCallable[[tuple[CSEVariable, ...], tuple[CSEVariable, ...]], tuple[CSEVariable, ...]]r  tuple[CSEVariable, ...]rw   rE  
r  r%  r  rE  r  r   r  r   rw   rE  )rw   zdict[sympy.Symbol, sympy.Expr]r  r  r  r(  r  r  r  r   r  r   r  r)  r  zOptional[CSEVariable]rw   r  ry   )
r  zUnion[CSEVariable, str]r  r  r  r  r	  z!Optional[Union[CSEVariable, str]]rw   rz   r#  )r  r   rw   rz   r  r  rv   rb   rz   rw   rx   )r  z;Union[list[sympy.Expr], tuple[sympy.Expr, ...], sympy.Expr]rw   r   )rp   r	   r  r	   rw   r  )r  r>   rw   r  )%r|   r}   r~   r   r  r   rZ   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r$  r#  r1  r  r;  r   r4  r5  s   @rP   r  r    s   M3FC9=I6= PT /( /HL /	 /  /D & &  (,'+	FF %F %	F
 
F F8"" SW"" *"3>"FO"	"
"" " &	"
 ;" 
5""'"
" (" 
!""'" (" 	"
 " 
!"" 4804"" C" &	"
 $" " 1" ." 
" " " 37V$V V 	V
 0V 
V<""&0"9="FJ"	"
"4%)N''
/P/	/.,3 3rR   r  c                  @    \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   Srg)r  i  r  zClassVar[str]r  Nr  r   r  rz   ops_namer   )	r|   r}   r~   r   r  r   r   rJ  r   r   rR   rP   r  r    s!    "C"#'E 'HcrR   r  c                 ^     SS K n U R                  U R                  S9$ ! [         a     g f = f)Nr   )	undefined)jinja2EnvironmentStrictUndefinedImportError)rM  s    rP   
jinja2_envrQ    s?    !!,, " 
 	
  s    
,,c                      \ rS rSrSr\ S       SS jj5       r\SS j5       r\    SS j5       rSS jr	      SS jr
SS jrS	rg
)KernelTemplatei  z[
Base class for defining kernel templates.

Children classes: TritonTemplate, CUDATemplate
c                    U R                  S5      n[        U5      S:  a"  USS   Vs/ s H  nSU-  U-  U-   PM     snUSS & SR                  U5      $ s  snf )NTrD   rF  r  )
splitlinesr  r   )sourcenum_indentsindents_spacinglinesr  s        rP   indent_except_first"KernelTemplate.indent_except_first  sh     !!$'u:>INqrIR&4<E!"I wwu~s   Ac                    [        5       nUc  g [        R                  UR                  S'   SSKJn   UR                  U 5      $ ! U a  n " S SU5      nU" U5      UeS nAff = f)NrZ  r   )TemplateSyntaxErrorc                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )IKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxErrori  c                   > [         TU ]  UR                  UR                  UR                  UR
                  5        Xl        g rj   )r0  r   messagelinenorb   filenameoriginal_error)re   rd  r2  s     rP   r   RKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__init__  s<    G$&..&--&++&//	 +9'rR   c                X   SU R                    S3nUSU R                   S3-  n[        U R                  S5      (       a  U R                  R                  R                  S5      nUS-  n[        SU R                   S-
  5      n[        [        U5      U R                   S-   5      n[        X45       Hw  nXPR                   S-
  :X  aS  XS-    S	X%    S3-  n[        U R                  S
5      (       a'  USSU R                  R                  S-
  -  -   S-   -  nMf  Mh  XS-    SX%    S3-  nMy     U$ )NzError in template at line 
zError message: rV  z	Context:
r   r   rD   z: --> columnz     rF  z^
z:     )rb  ra  r/  rd  rV  splitmaxminr  r  rh  )re   
error_inforY  startendr  s         rP   r  QKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__str__  s:   #=dkk]"!MJODLL>"DDJt22H== $ 3 3 : : @ @ F"l2
 #At{{Q 7!#e*dkkAo>!&u!2A KK!O3 *QvehZr.J J
#*4+>+>#I#I$.(/*-1D1D1K1Ka1O*P)Q*/)0%&J $J !+QvehZr.J J
 "3 &%rR   )rd  )rd  r]  rw   rx   ry   )r|   r}   r~   r   r   r  r   r4  r5  s   @rP   DetailedTemplateSyntaxErrorr_    s    9& &rR   rp  )rQ  rS  rZ  filtersrM  r]  from_string)rV  envr]  erp  s        rP   _template_from_string$KernelTemplate._template_from_string  sj    l;-;-O-O)*.#	8??6**" !	8&.A &> .a0a7C!	8s   A A#
AA#c                0  ^^ [         R                  R                  m[        U [        [
        45      (       a0  U  Vs0 s H!  oR                  5       UR                  5       _M#     snmO U R                  5       U R                  5       0mSUU4S jjnU$ s  snf )Nc                @   > TR                  U 5      nUb  U$ T" U 5      $ rj   )r  )rb   r  _get_dtype_reallookups     rP   r   1KernelTemplate._fake_get_dtype.<locals>.get_dtype	  s'    ZZ%F!"4((rR   )rb   rz   rw   r   )r5   r   r   rn  r/  r0  r   )	fake_outsr  r   ry  rz  s      @@rP   _fake_get_dtypeKernelTemplate._fake_get_dtype	  s|     ''++i$//AJK#llncmmo5KF((*I,?,?,ABF	) 	)  Ls   (Bc                    Xl         g rj   r  r  s     rP   r   KernelTemplate.__init__%	  s    	rR   c                     UR                  U R                  " S0 UD65        g! [         aN  n[        R	                  SU[        U 5      [        R                  5       [        R                  :  S9  Us SnA$ SnAff = f)z
Maybe generates a new ChoiceCaller and appends it into existing choices.
Returns None if success, otherwise returns the error.

choices: A list of ChoiceCallers.
kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
Nz3Cannot Append Choice: %s. KernelTemplate type is %s)
stack_infor   )	r  r  r   rW  inforo  getEffectiveLevelrL   INFO)re   choicesr  rt  s       rP   maybe_append_choice"KernelTemplate.maybe_append_choice(	  sl    
	NN4==2623" 	HHET
002W\\A	   H	s   !$ 
A<AA71A<7A<c                    [         e)z=
Generates a ChoiceCaller instance from the given arguments.
r  )re   r  s     rP   r  KernelTemplate.generate?	  s
    
 "!rR   r  N)   )rV  rz   rW  r   rX  r   rw   rz   )rV  rz   rw   r	   )r|  zUnion[list[Buffer], Buffer]rw   zCallable[[str], torch.dtype]rH  )r  r{   r  r	   rw   zOptional[NotImplementedError])r  r	   rw   r<   )r|   r}   r~   r   r   r   rZ  ru  r}  r   r  r  r   r   rR   rP   rS  rS    s     >?"%8;	  *8 *8X .	% " ,/	&."rR   rS  c                  L  ^  \ rS rSrS rSU 4S jjrSS jrSS jr  S         SS jjr          SS jr	SS jr
SS jr S         SS	 jjrSS
 jr          SS jr        SS jr          SS jr  S               SS jjrSrU =r$ ) r  iG	  c                ^   > [         TU ]  5         SSKJn  U" 5       U l        Xl        X l        g )Nr   ValueRangeAnalysis)r0  r   re  r  vr_analysisr  parent_handler)re   r  r  r  r2  s       rP   r   CSEProxy.__init__J	  s'    /-/,rR   c                  ^^^^^	^
^ U R                   " T/TQ70 TD6m	[        U R                  T5      " T0 TD6n[        5       n[	        5       mS m
TS:X  a  TS:X  a  UR
                  m
OsTS:X  aS  TS:X  aM  [        R                  R                  R                  R                  [        R                  S 5      R
                  m
OTS;   a  [        UT5      nU" T0 TD6m
TS;   a  T
c   eSmSUUU	UUU
U4S jjn[        R                  " Xt5      $ )	Nmaskedr  r  )r  r  r  )r  r  r   c                *  > [        T[        [        45      (       a  TT	   OTnT	S-  m	TS:X  a(  [        U [        5      (       a  U R                  c  Xl        [
        R                  R                  R                  [
        R                  R                  U TTS9nUR                  TTT5        [        R                  R                  (       d  [        R                  R                  (       a)  Uc   e[        [
        R                  R                  X!5        U$ )NrD   r  )re  r   )rn  r/  r0  r  r   r5   r  r  r  r  rr  r   r  r  r  r  )
r  	var_dtypecsevarrp   r  re  r  rb   r  
output_idxs
      rP   do_cse!CSEProxy._default.<locals>.do_csek	  s    
 lT5M:: Z(! 
 !OJ %Jq+$>$>177?#XX\\**  "	 + F !!$f5 ##??&&>> ,,,AHH,,f@MrR   )r  rz   rw   r  )_bound_variabler  r  r!   r'   r   r5   interpreterr  r  r  r  r  pytreetree_map)re   rb   rp   r  rd   dtype_handlerdtype_opr  r  re  r  r  s    ```    @@@@rP   _defaultCSEProxy._defaultR	  s   %%d<T<V<++T2DCFC24%'88 3 ;;LX'U"2==55::>>#''e  00}d3H#T4V4L''+++
	 	> v--rR   c                  ^	 SSK Jn  SSKJn  SSKJn  [        [        R                  U5      (       a  [        R                  " 5       $ [        [        R                  U5      (       a  [        R                  " 5       $ [        R                  R                  m	T	R                  U:X  a  U R                  R                  b  [        U R                  R                  [        5      (       d$   [!        U R                  R                  5      5       eU R                  R                  R#                  T	[        R                  " 5       5      $ [$        R&                  (       az  [)        XA5      (       aj  [+        U	4S jS 5       5      (       a  [        R                  " 5       $ U(       a   eS	S jn[-        [/        Xr5      5      n[1        U R2                  U5      " U6 $ [        R                  " 5       $ )
z
If the variable comes from an FX node, we forward the bound we have already computed
Else, if the variable when codegen'ing another op, we try to compute its bounds
r   r  )TritonTemplateKernelrD   )CUDATemplateKernelc              3  @   >#    U  H  oTR                   ;   v   M     g 7frj   )r  )r  r-  fx_nodes     rP   r  +CSEProxy._bound_variable.<locals>.<genexpr>	  s     V0U1&0Us   )set_indirectr  r  c                    [        U [        5      (       a  U R                  $ [        U [        R                  5      (       a  [        U 5      $ U $ rj   )rn  r  re  r   r.  r   r?  s    rP   arg_to_bound.CSEProxy._bound_variable.<locals>.arg_to_bound	  s8    a--88O5::..&q>)HrR   )r@  r	   rw   r	   )re  r  select_algorithmr  cuda.cuda_kernelr  rn  r5   r  r   r  r  r  r  r  dictro  r  r   compute_all_boundsr/  r  r/  r  r  r  )
re   rb   rp   r  r  r  r  r  
arg_boundsr  s
            @rP   r  CSEProxy._bound_variable	  sY   
 	0;8ahh 455&&((ahh 233&&((--,,>>T!dkk&@&@&Ldkk88$?? **B ? ;;--11';;N;N;PQQ&&73E+L+L V0UVVV"**,, : c,56J4++T2J??""$$rR   c                l   [        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       d   [        U5      U45       eUR                  R                  S:  Ga  U(       a  [        R                  " U[        R                  " U[        R                  5      5      nUR                  R                  S:  a.  [        R                  " US5      n[        R                  " XeU5      nOUn[         R"                  " 5       nUR                  [         R"                  " 5       :w  a  [        U[        R$                  5      (       a  UR                  [!        [&        * S5      -  n[!        UR                  U-   UR                  U-   5      nUR                  R                  S:  a!  UR                  [!        S[&        5      -  n	Xy-  nU R(                  R*                  R-                  U R(                  R.                  XWS9nU R0                  R3                  XU5      n
[5        U5      (       av  UR                  R                  S:  + n[        U[        R$                  5      (       + =(       d    UR                  R                  U:  + nU R(                  R7                  XX5        U
$ )Nr   r  )re  )rn  r   r   r  r.  ro  re  r  r0   rb  r  r   longr  ltr  r   r  Numberr   r  r  r  r  r  r  r&   r  )re   r  r   r  r  stmr  
new_bounds
neg_boundspos	sympy_varassert_lowerassert_uppers                rP   r  CSEProxy.indirect_indexing	  s    dC  ==&D$

++?d4j$-??+ ::aggc3>>$

#CD::##q(QB))BS1C %,,.Jzz[0022z$7U7U !ZZ+vgr*BB
($$t+Z-=-=-D
 ::##q(**{1f'==C!+!1J++//**4;;+>+>*WC''99#UK	5!! #

 0 0A6L)$== 

  4BL KK$$YlQrR   c                :    U R                   R                  XX45      $ rj   )r  r  r  s        rP   r  CSEProxy.check_bounds	  s     {{''EAArR   c                   XR                   R                  R                  ;   a)  [        R                   R                  R                  U5        [        U[        R                  5      (       a  U R                   R                  X5      $ U R                   R                  R                  nX;   a  X1   $ U R                   R                  X5      nUR                  S:X  a  U R                   =R                  S-  sl        U$ r<  )r  r  r  r5   r  rb  r   r   TMPr  r  r  rf  r  )re   rb   r  r  outs        rP   r  CSEProxy.load	  s    ;;??555 HH&&**40udhh//;;,,T99kkoo11$$kkt+ ==AKK  A% 
rR   c                z   X R                   R                  R                  U'   U R                   R                  (       a~  U[        R
                  R                  ;   a_  U R                   R                  R                  U5      nUR                  5        H%  nX R                   R                  R                  U'   M'     g g g rj   )	r  r  r  r  r5   r   name_to_buffer
get_outputget_mutations)re   rb   rd   r  
other_names        rP   _update_store_cacheCSEProxy._update_store_cache 
  s    ,1##D);;##0F0F(F++**55d;C!//1
:?++J7 2 )G#rR   c                    U R                   R                  R                  U5        Uc  U R                  X5        U[        R
                  R                  ;  a  U R                   R                  XX4S9  g g )N)rY   )r  r  rb  r  r5   r   r  r  r  s        rP   r  CSEProxy.store
  s]     	&&**40<$$T1qww...KKd5< /rR   c                    U R                   R                  R                  U5        U R                  X5        U[        R
                  R                  ;  a  U R                   R                  XU5      $ g rj   )r  r  rb  r  r5   r   r  r  r  s       rP   r  CSEProxy.store_reduction
  sV    &&**40  -qww...;;..tEBB /rR   c                x    U R                   =R                  S-  sl        U R                   R                  XX45      $ r<  )r  r  r  r  s        rP   r  CSEProxy.reduction
  s0     	!!Q&!{{$$U~MMrR   c                :    U R                   R                  XU5      $ rj   )r  r  r  s       rP   r  CSEProxy.scan!
  s     {{F;;rR   c                :    U R                   R                  XX45      $ rj   )r  r  r  s        rP   r  CSEProxy.sort,
  s     {{CCrR   c           	     D    U R                   R                  UUUUUUU5      $ )a
  
[Note: Inductor bucketize op]

Inputs:
-------
values: the values to be bucketized.
boundaries: a tuple containing
  (a) the name of the boundaries tensor (which must be sorted, unless
  the sorting tensor is present),
  (b) the length of the tensor in the last dimension (i.e. the length of
  one set of boundaries),
  (c) the number of elements in the underlying storage (i.e. the length
  of the flattened tensor, ignoring striding), and
  (d) the stride of the tensor in the last dimension.
boundary_indices: indices into a flattened version of the boundaries
tensor, of the same size and shape as "values".  Each index points to
the first element in the set of boundaries to be used for the
corresponding value.
indexing_dtype: the dtype to use when indexing into the boundaries
tensor.  This must be int64 or int32.  This additionally specifies the
dtype of the return value.
right: see "Details" below.
sorter: an optional tuple containing
  (a) the name of an optional sorting tensor, used to access unsorted
  boundaries without reordering the boundaries tensor, and
  (b) the stride of the tensor in the last dimension.
The values in the sorting tensor are used as indices into the *last*
dimension of the boundaries tensor, with all other indices matching.
The size of the sorting and boundaries tensors must be equivalent.
sorter_indices: must be present if the sorting array is present; see
"boundary_indices" for the equivalent definition for the boundaries
tensor.

Output:
-------
The buckets each value belongs in, within a given set of boundaries.  0
indicates a position before the first boundary, and len(boundaries_set)
represents a position after the last boundary.

Details:
--------
Given a value and a set of boundaries, calculate the bucket that each
value belongs to.  This works differently in 1-D and N-D cases.

for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [0, 4, 4, 8], right=True
return =   [[ 0, 1, 1, 1], [1, 3, 3, 4]].

for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [[0, 4], [4, 8]], right=True
return =   [[ 0, 1, 1, 1], [0, 1, 1, 2]]

Note that in the N-D boundaries case, the shape of "values" and
"boundaries" must match in every dimension _except_ the last.

When right == False, bucket i refers to range (boundaries[i], boundaries[i+1]].
When right == True,  bucket i refers to range [boundaries[i], boundaries[i+1]).

Boundaries must be non-decreasing, or a sorter must be provided which
would re-index offsets in a non-decreasing order (e.g. the second output
of torch.sort(offsets)).  Otherwise, the result is undefined.
)r  r  r  s           rP   r  CSEProxy.bucketize5
  s1    L {{$$
 	
rR   )r  r  r  )r  zKernel[Any]r  zOpsHandler[Any])rb   rz   rp   ztuple[Any, ...]r  zdict[str, Any]rw   r	   )rb   rz   rp   r	   r  r	   rw   rv  r   )
r  r  r   r!  r  r   r  r   rw   r"  r#  r?  )rb   rz   rd   r  rw   rx   rj   rA  r@  rB  rD  rF  r'  rG  )r|   r}   r~   r   rb   r   r  r  r  r  r  r  r  r  r  r  r  r  r   r4  r5  s   @rP   r  r  G	  s   D-8.t+%b // %/ 	/
 / 
/bBB&0B9=BFJB	B
"@ SW== *=3>=FO=	=CNN N &	N
 ;N 
5N	<'	<
	< (	< 
!	<D'D (D 	D
 D 
!D  4804N
N
 CN
 &	N

 $N
 N
 1N
 .N
 
N
 N
rR   r  )rO   rz   rw   rx   r'  )r   rz   r[  r  r\  r  r]  r  r^  Optional[CustomGraphModulePass]rw   rx   )r   Union[torch.device, str, None]rw   zOrderedSet[BackendFeature])r   r  ru  ra  rw   r   )r   rz   rw   zOptional[SchedulingConstructor])F)r   rz   ry  r   rw   r  )r   rz   rw   r  rv   )r  Sequence[sympy.Expr]r  r  r  r  rw   r   )r   rz   r  r  rw   rx   )r   rz   rw   r  )r  rz   rp   r	   r  r	   rw   r  )r   r(   r  r  r   r   rw   rx   )r  rz   rw   r   r   r*  )rw   r	   )
__future__r   r^   r  dataclassesenumr  r  rL   rX  r  r`   rer\   abcr   r   r   r   r   typingr	   r
   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   torch.fxtorch._prims_commonr   torch.utilsr   r  torch.utils._ordered_setr   torch.utils._sympy.numbersr   torch.utils._sympy.printersr   _PythonPrintertorch.utils._sympy.symbolr   r   r   torch.utils._sympy.value_rangesr   r   r  r   r    dtype_propagationr!   ops_handlerr"   r#   utilsr$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   virtualizedr0   r1   r2   r3   r4   r5   collections.abcr6   r7   r8   r9   custom_graph_passr:   r   r;   r<   r=   r>   r"  r@   r#  rA   rB   rC   r  rF   rG   r  ro  r  rz   r3  rb  r  _logginggetArtifactLoggerr|   rJ   	getLoggerrW  rQ   	dataclassrT   r   r   r   r   r   r  r  r  r  KernelArgTyper  r   r  rX  rY  r_  ra  rq  rv  rp  r{  r}  cacherm  r  r  r  bfloat16r  float16r   rI  float64int8int16r>  r  r   uint16r&  uint64r  r  r  r  r7  compile
IGNORECASEr  r  r  r,  r  INT_TO_FLOATr  r  r  r  r  r  r  r  r  r  rx  r  r0  r   ReductionCacheKeyr|  r  r  r  rQ  rS  r  r   s   0rP   <module>r     s   "          	 	  #  
 
 
 ,    ? ) / - G O O D  : ;    Q P BB$9>>$DD-	B$hy&9%:N%JK23sELL()J F~~//*E!=
   >/		 /(C  Ta= a aH* * # # #       ! ! ! = = = lIw8H,VW,.) .5" 5"p :< 6 ;DF A F8 @D:>
7
7,
7 /
7 !=	
7
 8
7 

7
&T 
&3*33$3*35C3	3U
 &+

"
!
V K K\UU$U  U 	U;;&7;	;, 
NNEKK	MM5;;> JJMMMMJJKKKKKKKKLLLLLL

E 	
> : ,''' ' 	'T::!0:9D:	:2aB aBH%N %S1 S1l zz";2==Q D;#%5z# D;N - - -  6: `6;HH-`6 ;HH/- 	`6 ;HH/- 	`6$ ;HH/- 	%`60 ;HH/- 	1`6< ;HH*)	=`6L ;HH(0	M`6X 	;HH66>	Y`6h ;HH1i`6r ;HH2s`6| ;HH1}`6F ;HH2 G`6P ;HH%8$Q`6^ 	;HH&%		_`6j ;HH%8	k`6v 	;HH&	w`6@ ;HH+A`6L %;HH88)	M`6X %;HH88)	Y`6d %;HH8)e`6n %;HH8)o`6z 
;HH'
{`6D ;HH(E`6N ;HHc	O`6^ ,;HH?0_`6h ,;HH?0i`6t &;HH9*u`6~ 
;HH*
`6H );HHD-I`6R );HHD-S`6\ );HHD-]`6f );HHD-g`6p (;HHC,q`6z $1;HHL5${`6D $1;HHL5$E`6N $1;HHL5$O`6X $1;HHL5$Y`6b ';HHB+c`6l (;HHC,m`6v (;HHC,w`6 2 `F	-# -"> *Z 
 N N N 
 ,J JZ
!; !;H 5+;Tk5c!1223	5e'/=0
1 eP
< 
<\3Wgo. \3~     p" p"f|
~ |
O8s   c