
    hM                         S SK JrJrJr  S SKJrJrJrJr  S SK	J
r
  S SKJr  S SKJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKJr  S	\4S
 jrS rS r\" SS9 " S S5      5       r " S S\5      rg)    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     S $ )Nc                     g)N   r   r    )lhs_typerhs_types     V/var/www/fran/franai/venv/lib/python3.13/site-packages/triton/backends/amd/compiler.py<lambda>"get_min_dot_size.<locals>.<lambda>   s    i    r   r   s    r   get_min_dot_sizer      s
     0/r   c                 v    [         R                  R                  c  U S:H  $ [         R                  R                  $ Ngfx942)r
   r	   use_block_pingpongarchs    r   is_pingpong_schedule_enabledr%      s+    !&!=!=!EDHg599KgKggr   c                 v    [         R                  R                  c  U S:H  $ [         R                  R                  $ r    )r
   r	   use_in_thread_transposer#   s    r   is_in_thread_transpose_enabledr(      s.    !&!B!B!JDHqPUPYPYPqPqqr   T)frozenc                   Z   \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   S	r
\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   S	r\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S% r S& r!Sr"g	)'
HIPOptions      	num_warpsr   waves_per_eu   
num_stagesnum_ctasNextern_libsr   cluster_dimsFdebugTsanitize_overflowr$   )fp8e5supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r:   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridr   matrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namenoneschedule_hintc                 D   [        U R                  SS 5      nUS:  a  SOSn[        R                  U SU5        U R                  S:  a   U R                  U R                  S-
  -  S:X  d   S	5       eU R                  S
:X  a  U R
                  S:X  d   S5       e[        [        5      R                  S-  nU R                  c  0 O[        U R                  5      nS H  n[        X5 S3-  5      XE'   M     [        R                  U S[        UR                  5       5      5        g )N   
       @   	warp_sizer   r   znum_warps must be a power of 2gfx950zgfx950 only accepts kpack == 1lib)ocmlocklz.bcr3   )intr$   object__setattr__r.   r@   r   __file__parentr3   dictstrtupleitems)self	gfx_majorrM   default_libdirr3   rO   s         r   __post_init__HIPOptions.__post_init__E   s    		!B(	#r/Br	4i8~~!t~~!9K'LQR&R 	0/	0R 99 ::?D$DD?h..6 ,,4b$t?O?O:P#C">e3K#?@K $4k6G6G6I0JKr   c           	          SR                  U R                  R                  5        VVs/ s H  u  pU SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )N_-utf-8)join__dict__rZ   hashlibsha256encode	hexdigest)r[   namevalkeys       r   hashHIPOptions.hashU   sa    hh9L9L9NO9NID4&#9NOP~~cjj12<<>> Ps   A7
)#__name__
__module____qualname____firstlineno__r.   rR   __annotations__r/   r1   r2   r3   rW   r4   rY   r5   boolr6   r$   rX   r8   r   r9   r;   r<   r=   r>   r?   r@   rA   rB   rD   rF   r^   rm   __static_attributes__r   r   r   r+   r+      s    IsL#JHcK#L%#E4"t"D#'2%*246%uSz6'--/9 %*9!d!$)T) !#!E3N$$)*!3*L#"  M3L ?r   r+   c                   f  ^  \ rS rSr\S\4S j5       rS\SS4U 4S jjrS\4S jr	S\
4S jrS	 rS
 rS\\\4   4S jrS r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rS r\R<                  " 5       S 5       rSr U =r!$ )
HIPBackendZ   r   c                      U R                   S:H  $ )NrC   )backendr   s    r   supports_targetHIPBackend.supports_target\   s    ~~&&r   returnNc                 t   > [         TU ]  U5        [        UR                  [        5      (       d   eSU l        g )Nhsaco)super__init__
isinstancer$   rX   
binary_ext)r[   r   	__class__s     r   r   HIPBackend.__init__`   s.     &++s++++!r   c                      SUR                    3$ )Nhip:r#   r[   optionss     r   get_target_nameHIPBackend.get_target_namee   s    gll^$$r   c                    S[         R                  R                  =(       d    U R                  R                  0nU R                  R                  S:X  aB  [        [        R                  5      nUR                  S15        [        [        U5      5      US'   SU;  a  [        [        R                  5      nU R                  R                  S:X  a  UR                  1 Sk5        O[U R                  R                  S:X  a  UR                  SS	15        O-S
U R                  R                  ;   a  UR                  SS	15        [        [        U5      5      US'   SU;  a  [         R                  R                  US'   UR                  [        R                  R                  5        Vs0 s H  nXQ;   d  M
  X   c  M  XQU   _M     sn5        [        S0 UD6$ s  snf )Nr$   r!   tf32r<   r8   >   fp8e4b8fp8e4nvfp8e5b16rN   r   r7   gfx12r=   r   )r
   runtimeoverride_archr   r$   setr+   r<   updaterY   sortedr8   languagedefault_fp_fusion__dataclass_fields__keys)r[   optsargsr<   r8   ks         r   parse_optionsHIPBackend.parse_optionsh   s   33Gt{{7G7GH ;;x'+.z/V/V+W((//938@\9]3^D/0!-#&z'F'F#G {{8+$++,NO!!X-$++Y,@ADKK,,,$++Y,@A+08L1M+ND'(T)',~~'G'GD#$)H)H)M)M)O ;)OA	  &*g  QQZ)O ; 	<!D!!;s   #	G0G7	Gc                     UR                   UR                  UR                  UR                  S   UR                  S   UR                  S   4$ )Nr   r   r0   )r.   r2   sharedr4   )r[   metadatas     r   pack_metadataHIPBackend.pack_metadata   sO    OO!!!$!!!$!!!$
 	
r   c                 0    S[        U R                  5      0$ )Nmin_dot_size)r   r   r   s     r   get_codegen_implementation%HIPBackend.get_codegen_implementation   s     0 =>>r   c                     SSK Jn  SU0$ )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r[   r   s     r   get_module_mapHIPBackend.get_module_map   s    719==r   c                 0    [         R                  " U5        g N)r	   load_dialects)r[   ctxs     r   r   HIPBackend.load_dialects   s    #r   c                     SS K nSn[        U S5      (       a  U R                  5       U:*  $ [        XR                  5      (       a2  [        U S5      (       a!  U R                  5       R                  5       U:*  $ g)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbHIPBackend.is_within_2gb   sd    
3$$==?j00c<<((WS:K-L-L&&(--/:==r   c                 N    [         R                  " U 5      nSU ;   a  USS//-  nU$ )NSztt.pointer_rangerK   )r   
parse_attr)descrets     r   r   HIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     [         R                  " X40 UD6n[        R                  R                  (       a%  US:X  a  [
        R                  U 5      (       a  US-  nU$ )Ntensorr   )r   get_arg_specializationr
   r	   use_buffer_opsrw   r   )r   tykwargsr   s       r   r   !HIPBackend.get_arg_specialization   sK    00CFC 99##h:;S;STW;X;X3JC
r   c                     [         R                  R                  n U b"  [        U 5      nUR	                  5       (       a  U$ [        [
        5      R                  S-  nUR	                  5       (       a  U$ [        S5      nUR	                  5       (       a  U$ [        S5      nUR	                  5       (       a  U$ [        S5      e)Nzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r
   r	   lld_pathr   is_filerU   rV   	Exception)lld_env_pathllds     r   path_to_rocm_lldHIPBackend.path_to_rocm_lld   s     yy))#|$C{{}}
8n##&77;;==J./;;==J$%;;==Jqrrr   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R!                  U5        UR#                  U 5        U $ r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirHIPBackend.make_ttir   s    __S[[)
!!"%..r2<<R@''+#))"-b!##B'$$R(##B'
s
r   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  USUR                   3UR                  UR                  UR                  5        UR                  U 5        [         R                  " U R                  5      nUR                  5         [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [         R                  R                  R#                  X2R                  UR$                  UR&                  5        [        R                  R                  U5        [         R                  R                  R)                  U5        [        R                  R+                  US5        [         R                  R                  R-                  U5        [        R                  R/                  U5        [        R0                  R3                  U5        [        R
                  R5                  U5        [        R0                  R3                  U5        [6        R                   R8                  n[6        R                   R:                  n[6        R                   R<                  nUR>                  S:X  a  S=pE[         R                  R                  RA                  X2RB                  XEU5        U(       a3  [         R                  R                  RE                  X2R                  5        [        R0                  R3                  U5        UR>                  RG                  5       S:w  a3  [         R                  R                  RI                  X2R>                  5        [        R                  R+                  US5        [        R                  R                  U5        [        R                  RK                  U5        [M        UR                  5      (       aH  [         R                  R                  RO                  U5        [        R                  R                  U5        [         R                  R                  RQ                  U5        [S        UR                  5      nU(       aC  URB                  S:X  a3  [         R                  R                  RU                  X2RB                  5        [6        R                   RV                  (       a{  [         R                  R                  RY                  U5        [        R0                  R3                  U5        [         R                  R                  R[                  X2R                  5        [         R                  R                  R]                  U5        [        R0                  R3                  U5        [        R0                  R_                  U5        [        R0                  Ra                  U5        U(       a3  [         R                  R                  Rc                  X2R                  5        UR                  U 5        U $ )Nr   Tzlocal-prefetchr   rE   r0   )2r   r   r   r   r   r   add_convert_to_ttgpuirr$   r.   rM   r2   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulr?   r@   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   global_prefetchlocal_prefetchuse_async_copyrF   add_stream_pipeliner1   add_coalesce_async_copylowerinsert_instruction_sched_hintsadd_reduce_data_duplicationr(   add_in_thread_transposeadd_reorder_instructionsr%   add_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsadd_fold_true_cmpir   r   add_update_async_wait_count)r   r   r   r   r   r   r   r"   s           r   
make_ttgirHIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00\\7C_C_ahanano44R8

00400T:

77;,,R0''+##B'''+))331111   $44/00O

..r3E3EhvwJJ66r<<H''+  &&(F2JJ==bBWBWX00T:44R82226)',,77JJ66r:NN88<

33B79',,G'"4"4"9JJ11"6H6HI99##JJ88<MM++B/JJ88\\J

--b1''+b!$$R(JJ::2||L
s
r   c                    U n[         R                  " UR                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        UR                  U5        U$ r   )r   r   r   r   r   r   r   r   add_sccpr   add_loop_aware_cser    add_combine_tensor_select_and_ifr   )srcr   r   r   r   s        r   	ttgir_optHIPBackend.ttgir_opt  s    __S[[)
""2&r"&&r*((,77;
s
r   c                    U n[         R                  " UR                  5      nUR                  5         Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R                  U5        [
        R                  R                  U5        [
        R                  R                  U5        Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R                  U5        [
        R                  R!                  U5        [
        R                  R#                  U5        [
        R                  R%                  U5        [
        R                  R                  U5        [
        R                  R!                  U5        [
        R                  R'                  U5        UR(                  R+                  5       S:w  a>  [        R
                  R                  R-                  XBR                  UR.                  5        [0        R2                  R4                  (       d  [
        R6                  R9                  U5        [        R
                  R                  R;                  XF5        UR=                  U5        [>        R@                  " 5         [>        R                  " 5       n[>        RB                  " X75      n[        RD                  " U5        Sn	[0        R2                  RF                  (       a  Sn	[>        RH                  " U[        RJ                  UR                  U	5        [        RL                  " XR                  5        [        RN                  " US5        [        RP                  " USS5        [        RP                  " US	S5        [        RP                  " US
S5        [        RP                  " USURR                  S:H  5        URU                  5        V
s/ s H  oRW                  5       (       a  M  U
PM     nn
US   RY                  [        RZ                  5        US   R]                  SSUR^                  URR                  -   35        US   R]                  SUR`                   5        URb                  (       a  SOSnUS   R]                  SU5        [0        R2                  RF                  (       a'  US   Re                  S5        US   Rg                  5         [        Rh                  " US   5        [0        R2                  RF                  (       a\  [k        [l        5      Rn                  S-  n[q        US-  5      [q        US-  5      [q        US-  5      /n[>        Rr                  " X5        OeURt                  (       aT  URt                   VVs/ s H%  u  nn[        Rv                  " X5      (       d  M#  UPM'     nnn[>        Rr                  " X5        [>        Rx                  " U[>        Rz                  UR                  S/ UR|                  5        [0        R                  R~                  (       a  [        R                  " US   5        U R                  S5      US'   [        R                  " U5        [        R                  " U5        [q        U5      $ s  sn
f s  snnf )Nr   TrE    +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rL   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr:   zdenormal-fp-math-f32rO   z
asanrtl.bczocml.bczockl.bcz
ttg.sharedr   )Dr   r   r   r   r	   r   r   add_optimize_lds_usager$   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rF   r   lower_instruction_sched_hintsr1   r
   compilationdisable_line_infollvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrM   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr.   r/   rA   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rU   rV   rX   link_extern_libsr3   need_extern_liboptimize_moduleOPTIMIZE_O3r=   scalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r  r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moder]   pathsrj   paths                    r   	make_llirHIPBackend.make_llir#  s   __S[[)
 

11"llOT$$R(**2.11"5 	

((\\9E''+b!''+**2.''+b!$$R(  &&(F2JJ<<RwOaOab  22MM&&r*

55bD
s 	,,.>>#/  *((&Ox):):GLL/Z 	Hll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224P4b<M<M<Or4PA > >?A8Bw?P?PQXQbQb?b>c:de 	A0W5I5I4JL+2+E+E6A1=A((F((2F##%
 	  Q(((!(^22U:NN\12NY./NY./E
 !!(2  .5.A.Ai.AltTSEXEXYaEhT.AEi!!(2Xt'7'7r2wOgOgh99**33CF; !--l;$$X. 	  *8}_ Q@ js   	[%["[([c           	         [         R                  " SU 5      n[        U5      S:X  d   eUS   US'   / nUR                  S:X  a  UR	                  S5        [
        R                  " U [        R                  UR                  SXBR                  S5      n[        R                  R                  (       a  [        S	5        [        U5        U$ )
Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rj   	attentionzsink-insts-to-avoid-spillsr  Fz!// -----// AMDGCN Dump //----- //)refindalllenrF   appendr   translate_to_asmr	   r%  r$   r=   r
   dump_amdgcnprint)r  r   r   namesflagsamdgcns         r   make_amdgcnHIPBackend.make_amdgcn  s    
 

QSVW5zQ 8
   K/LL56&&sC,=,=w||RQVXpXprwx99  56&Mr   c                    Sn[         R                  R                  (       a  Sn[        R                  " XR
                  U5      n[        R                  5       n[        R                  " 5        n[        R                  " 5        n[        UR                  S5       nUR                  U5        S S S 5        [        R                  " USSSUR                  SUR                  /5        S S S 5        [        UR                  S5       n	U	R                  5       n
S S S 5        S S S 5        W
$ ! , (       d  f       N= f! , (       d  f       NX= f! , (       d  f       N:= f! , (       d  f       W
$ = f)	Nr  r  wbz-flavorgnuz-sharedz-orb)r
   r  r#  r	   assemble_amdgcnr$   rw   r   tempfileNamedTemporaryFileopenrj   write
subprocess
check_callread)r  r   r   r=  r   	rocm_pathtmp_outtmp_infd_infd_outr   s              r   
make_hsacoHIPBackend.make_hsaco  s   ((&O##CG//1	((*g,,.&&++t,KK& -%%y)UIv{{\`bibnbn&op / gllD)Vkkm * + 
 -, /. *) +* 
sT   .ED8D'-9D8&EE	E'
D51D88
E	E	
E	E
E)c                    ^ ^ U[         R                  :X  a  UU 4S jUS'   UU 4S jUS'   OU[         R                  :X  a
  UU 4S jUS'   UU 4S jUS'   UU 4S jUS	'   UU 4S
 jUS'   g )Nc                 (   > TR                  XT5      $ r   )r   r  r   r   r[   s     r   r   'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#QX3Yr   r   c                 (   > TR                  XT5      $ r   )r  rh  s     r   r   ri    s    DOOCSZ4[r   ttgirc                 (   > TR                  XT5      $ r   )r	  rh  s     r   r   ri    s    DNN3RY4Zr   c                 (   > TR                  XT5      $ r   )rC  rh  s     r   r   ri    s    t~~cW/Ur   llirc                 (   > TR                  XT5      $ r   )rQ  rh  s     r   r   ri    s    1A1A#QX1Yr   rP  c                 (   > TR                  XT5      $ r   )rd  rh  s     r   r   ri    s    w0Wr   r   )r   TRITONGLUON)r[   stagesr   r   s   ` ` r   
add_stagesHIPBackend.add_stages  sR    x&YF6N[F7O'ZF7OUvYxWwr   c                 v    [         R                  " [        R                  5       S/SS9nU SU R                   3$ )Nz	--versionrc   )encodingrb   )r\  check_outputrw   r   r   )r[   versions     r   rm   HIPBackend.hash  s8    )):+F+F+H+*Vahi!DKK=))r   )r   )"ro   rp   rq   rr   staticmethodr   r{   r   rX   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r	  rC  rQ  rd  rt  	functools	lru_cacherm   ru   __classcell__)r   s   @r   rw   rw   Z   s\   '	 ' '"y "T "
%# %"S "2
?>S*_ 5 >
       s s&    ; ;z   k kZ  *   X * *r   rw   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rf   rX  rG  r\  r|  pathlibr   r   r%   r(   r+   rw   r   r   r   <module>r     sw    E E 5 5  ! # #    	   0Y 0hr $9? 9? 9?xk* k*r   