
    hp\                        S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJr  \R                   R#                  \R                   R%                  \5      5      r\R                   R)                  \S5      /rS	 r\ R.                  " 5       S
 5       r " S S\5      rS rSSSSSS.rSSSSSS.rSrS rS r  " S S\5      r! " S S\5      r"g)    N)Path)knobs)	GPUTarget)	GPUDriver)compile_module_from_src)TensorDescriptorincludec                   ^ ^^^^ SS K nUR                  5       S:w  a  g SS KmSSKJnJnJnJmJmJn   " UU4S jSTR                  5      nTR                  X5" U5      U" U5      U" U5      5      n TR                  S5      R                  nUT/Ul        X8l        SmTR!                  TS-   5      n	UU U4S	 jn
U" U" U
5      U	5      (       a%  ["        R$                  " TR'                  U	5      5      $ g ! [         a     g f = f)
Nr   Linux)c_charc_intc_size_tc_void_pc_char_pPOINTERc                   *   > \ rS rSrS Y4S Y 4/rSrg)8_find_already_mmapped_dylib_on_linux.<locals>.DlPhdrInfo   	dlpi_addr	dlpi_name N)__name__
__module____qualname____firstlineno___fields___static_attributes__)r   r   s   T/var/www/fran/franai/venv/lib/python3.13/site-packages/triton/backends/amd/driver.py
DlPhdrInfor      s    (#(#
    r   z	libc.so.6i      c           
         > U R                   R                  n[        [        R                  " U5      5      nTUR
                  ;   a&  TR                  X#[        T[        U5      5      5        gg)Nr!   r   )	contentsr   r   osfsdecodenamememmoveminlen)infosizedatar   pctypeslib_namemax_path_lengths        r   callback6_find_already_mmapped_dylib_on_linux.<locals>.callback2   sP    MM++	Y'(qvvNN4CY,PQr    )platformsystemr.   r   r   r   r   r   r   	Structure	CFUNCTYPECDLLdl_iterate_phdr	Exceptionargtypesrestypecreate_string_bufferr$   r%   	string_at)r/   r3   r   r   r   r   r   
callback_tr8   pathr1   r   r   r.   r0   s   `          @@@@r   $_find_already_mmapped_dylib_on_linuxr@      s    G#
 KK
 
V%% 
 !!%)<gh>OQXY_Q`aJ ++k2BB !+H5O#O&&':;D z(+T22{{6++D122+  s   2C4 4
D Dc                     Sn [         R                  R                  =n(       aM  UR                  U 5      (       a&  [        R
                  R                  U5      (       a  U$ [        SU SU  35      e[        U 5      nU(       a7  [        R
                  R                  U5      (       a  U$ [        SU SU  35      e/ n[        R
                  R                  [        R
                  R                  [        5      SU 5      n[        R
                  R                  U5      (       a  U$ UR                  U5        SS KnUR                  5       nUR                  5       nUR                   (       a  U/U-   nU H^  n[        R
                  R                  USSU 5      n[        R
                  R                  U5      (       a  Us  $ UR                  U5        M`     [        R"                  " S	5      n	U	(       ap  U	R%                  S
5       H[  n
[        R
                  R                  X5      n[        R
                  R                  U5      (       a  Us  $ UR                  U5        M]     [&        R(                  " SS/5      R+                  5       nUR-                  5        Vs/ s H;  oR/                  5       R                  U 5      (       d  M(  UR%                  5       S   PM=     nnU H<  n[        R
                  R                  U5      (       a  Us  $ UR                  U5        M>     [        R
                  R                  SU 5      n[        R
                  R                  U5      (       a  U$ UR                  U5        [        SU  SU 35      es  snf )Nzlibamdhip64.sozTRITON_LIBHIP_PATH 'z' does not point to a valid zmemory mapped 'z'' in process does not point to a valid libr   torchLD_LIBRARY_PATH:z/sbin/ldconfigz-pz/opt/rocm/lib/zcannot locate z after attempted paths )r   amdlibhip_pathendswithr$   r?   existsRuntimeErrorr@   joindirname__file__appendsitegetsitepackagesgetusersitepackagesENABLE_USER_SITEgetenvsplit
subprocesscheck_outputdecode
splitlinesstrip)r/   env_libhip_pathmmapped_pathpaths	local_librP   site_packages	user_siter?   env_ld_library_pathdflibslinelocsloccommon_install_paths                    r   _get_path_to_hip_runtime_dylibri   @   s   H  ))////##H--"''..2Q2Q""1/1BB^_g^hijj 8AL77>>,''_\N:abjaklmmE RWW__X6xHI	ww~~i  	LL ((*M((*I"m3ww||D'5(;77>>$KT	  ))$56$**3/AQ)Aww~~a  LLO	 0 ""$4d#;<CCED *.):^):jjl>S>ST\>]DJJL):D^77>>#JS  '',,'7B	ww~~)**""	LL$%
z1HP
QQ _s   'NNc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HIPUtils   c                 n   > [        U S5      (       d  [        [        U ]  U 5      U l        U R                  $ )Ninstance)hasattrsuperrk   __new__rn   )cls	__class__s    r   rq   HIPUtils.__new__   s-    sJ'' 37<CL||r    c                    [        5       n[        [        R                  R	                  [
        S5      5      R                  5       nUR                  SUS5      n[        US[        S9nUR                  U l
        UR                  U l        g )Nzdriver.cz/*py_libhip_search_path*/r!   	hip_utilssrcr&   include_dirs)ri   r   r$   r?   rL   rM   	read_textreplacer   ry   load_binaryget_device_properties)selfrH   rx   mods       r   __init__HIPUtils.__init__   si    46277<<45??A kk5{AF%#Kl[??%(%>%>"r    )r}   r|   )r   r   r   r   rq   r   r   __classcell__rs   s   @r   rk   rk      s    
	? 	?r    rk   c                 @    U S   S:X  a  gSSSSSSS	S
SSSSSSSS.U    $ )Nr   *hipDeviceptr_tint32_tint8_tint16_tint64_tuint32_tuint8_tuint16_tuint64_tdouble)i1i8i16i32i64u1u8u16u32u64fp16bf16fp32f32fp64r   )tys    r   	ty_to_cppr      sQ    	!u|  	!
 
r    r   r   r   )r   r   r   r   r   	pack_fp16	pack_bf16	pack_fp32	pack_fp64
piiiKKOOOOc           "        ^^^ S nU4S jmU4S jmU4S jm[        U" UR                  5       5      5       VVs0 s H  u  pEXE_M	     nnnSR                  UR                  5        Vs/ s H  nT" U5      PM     sn5      n[        U-   nSR                  [	        TUR                  5       5      5      n[        [        [        UR                  S5      5      5      n[        U5       V	Vs0 s H  u  pX_M	     nn	n[        U5      S:  a)  SSR                  S	 UR                  5        5       5      -   OSn
/ nUR                  5        HU  u  pUS
:X  a  M  U[        ;   a  UR                  [        U    SU	 35        M6  UR                  [        U5       SU	 35        MW     SR                  U5      n/ nUR                  5        Hb  u  pUS   S:X  a  UR                  SU	 S35        M%  U[        ;   a  UR                  SU	 S35        MF  US
:w  d  MN  UR                  SU	 35        Md     UR                  5        V	Vs/ s H-  u  pU[        ;   d  M  [        U    SU	 S[        U    SU	 S3PM/     nn	n[        5       n[        [!        [        U5      5      5      nUR                  5        V	Vs/ s H  u  pUS
:w  d  M  SU	 3PM     nn	nUR                  S5        SU S[        U5      S:  a  SU-   OS SSR                  U5       SU SU SSR                  UR                  5        V	Vs/ s H  u  pT" U5       SU	 S3PM     snn	5       SU S U
 S!SR                  U5       S"SR                  UR                  5        V	Vs/ s H  u  pUS   S:X  a  S#U	 S$U	 SU	 S%U	 S&3	OSPM!     snn	5       S'[        U5      S:  a  SSR                  U5      -   OS S(3nU$ s  snnf s  snf s  snn	f s  snn	f s  snn	f s  snn	f s  snn	f ))Nc                 
   / nU  H  n[        U[        5      (       a  UR                  S5      (       a  UR                  S5      S-   n[        R
                  " SU5      R                  5       nUR                  SU-   5        [        SU-  5       H  nUR                  S5        M     [        U5       H  nUR                  S5        M     [        U5       H  nUR                  S5        M     M  UR                  U5        M     U$ )	N
tensordesc,r!   ztensordesc<([^[>]*)r      r   r   )	
isinstancestr
startswithcountrematchgrouprO   range)	signatureoutputsigndimdtype_s         r   _expand_signature(make_launcher.<locals>._expand_signature   s     C#s##|(D(Dyy~)!6<BBDcEk*q4xAMM%( ) tAMM%( %tAMM%( % c"% ( r    c                 h   > [        U [        5      (       a  SR                  [        TU 5      5      $ U $ )Nr   )r   tuplerL   map)r   _serialize_signatures    r   r   +make_launcher.<locals>._serialize_signature   s,    c5!!88C 4c:;;
r    c                    > [        U [        5      (       a!  SR                  [        TU 5      5      nSU S3$ U S   S:X  a  gU S:X  a  g[	        U 5      $ )Nr   []r   r   z	PyObject*	constexprr   r   rL   r   r   )r   val_extracted_types     r   r   &make_launcher.<locals>._extracted_type   sV    b%  ((334Cse1:a5C<}r    c                    > [        U [        5      (       a!  SR                  [        TU 5      5      nSU S3$ U S   S:X  a  gU S:X  a  gSS	S
SSSSSSSS.
[	        U 5         $ )N ()r   r   Or   rb   lbhiLBHIK)
r   longr   r   r   r   r   r   r   r   r   )r   r   	format_ofs     r   r    make_launcher.<locals>.format_of   s    b%  ''#i,-Cse1:a5C<
 B- 	r    r   r   r   z, c              3   0   #    U  H  u  pS U 3v   M     g7f)z&_argNr   ).0r   r   s      r   	<genexpr> make_launcher.<locals>.<genexpr>  s      L:K5:Ks   r   z argr   ptr_infoz.dev_ptr_arg_storagez _argz_storage = z(_argz);z&argz&global_scratcha;  
#define __HIP_PLATFORM_AMD__
#include <hip/hip_runtime.h>
#include <Python.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <dlfcn.h>

// The list of paths to search for the HIP runtime library. The caller Python
// code should substitute the search path placeholder.
static const char *hipLibSearchPaths[] = {"aj  "};

// The list of HIP dynamic library symbols and their signature we are interested
// in this file.
#define HIP_SYMBOL_LIST(FOR_EACH_ERR_FN, FOR_EACH_STR_FN)                     \
  FOR_EACH_STR_FN(hipGetErrorString, hipError_t hipError)                     \
  FOR_EACH_ERR_FN(hipModuleLaunchKernel, hipFunction_t f,                     \
                  unsigned int gridDimX, unsigned int gridDimY,               \
                  unsigned int gridDimZ, unsigned int blockDimX,              \
                  unsigned int blockDimY, unsigned int blockDimZ,             \
                  unsigned int sharedMemBytes, hipStream_t stream,            \
                  void **kernelParams, void **extra)                          \
  FOR_EACH_ERR_FN(hipModuleLaunchCooperativeKernel, hipFunction_t f,          \
                  unsigned int gridDimX, unsigned int gridDimY,               \
                  unsigned int gridDimZ, unsigned int blockDimX,              \
                  unsigned int blockDimY, unsigned int blockDimZ,             \
                  unsigned int sharedMemBytes, hipStream_t stream,            \
                  void **kernelParams, void **extra)                          \
  FOR_EACH_ERR_FN(hipPointerGetAttribute, void *data,                         \
                  hipPointer_attribute attribute, hipDeviceptr_t ptr)

// The HIP symbol table for holding resolved dynamic library symbols.
struct HIPSymbolTable {
#define DEFINE_EACH_ERR_FIELD(hipSymbolName, ...)                             \
  hipError_t (*hipSymbolName)(__VA_ARGS__);
#define DEFINE_EACH_STR_FIELD(hipSymbolName, ...)                             \
  const char *(*hipSymbolName)(__VA_ARGS__);

  HIP_SYMBOL_LIST(DEFINE_EACH_ERR_FIELD, DEFINE_EACH_STR_FIELD)
};

static struct HIPSymbolTable hipSymbolTable;

bool initSymbolTable() {
  // Use the HIP runtime library loaded into the existing process if it exits.
  void *lib = dlopen("libamdhip64.so", RTLD_NOLOAD);

  // Otherwise, go through the list of search paths to dlopen the first HIP
  // driver library.
  if (!lib) {
    int n = sizeof(hipLibSearchPaths) / sizeof(hipLibSearchPaths[0]);
    for (int i = 0; i < n; ++i) {
      void *handle = dlopen(hipLibSearchPaths[i], RTLD_LAZY | RTLD_LOCAL);
      if (handle) {
        lib = handle;
      }
    }
  }
  if (!lib) {
    PyErr_SetString(PyExc_RuntimeError, "cannot open libamdhip64.so");
    return false;
  }

  // Resolve all symbols we are interested in.
  dlerror(); // Clear existing errors
  const char *error = NULL;
#define QUERY_EACH_FN(hipSymbolName, ...)                                     \
  *(void **)&hipSymbolTable.hipSymbolName = dlsym(lib, #hipSymbolName);       \
  error = dlerror();                                                          \
  if (error) {                                                               \
    PyErr_SetString(PyExc_RuntimeError,                                       \
                    "cannot query " #hipSymbolName " from libamdhip64.so");   \
    dlclose(lib);                                                             \
    return false;                                                             \
  }

  HIP_SYMBOL_LIST(QUERY_EACH_FN, QUERY_EACH_FN)

  return true;
}

static inline void gpuAssert(hipError_t code, const char *file, int line)
{
   if (code != HIP_SUCCESS)
   {
      const char* prefix = "Triton Error [HIP]: ";
       const char* str = hipSymbolTable.hipGetErrorString(code);
      char err[1024] = {0};
      snprintf(err, 1024, "%s Code: %d, Messsage: %s", prefix, code, str );
      PyErr_SetString(PyExc_RuntimeError, err);
   }
}

#define HIP_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int launch_cooperative_grid, int clusterDimX, int clusterDimY, int clusterDimZ, int shared_memory, hipStream_t stream, hipFunction_t functionz>) {
  hipDeviceptr_t global_scratch = 0;
  void *params[] = { z };
  if (gridX*gridY*gridZ > 0 && launch_cooperative_grid) {
    HIP_CHECK(hipSymbolTable.hipModuleLaunchCooperativeKernel(function, gridX, gridY, gridZ, z*num_warps, 1, 1, shared_memory, stream, params, 0));
    return;
  }
  if (gridX*gridY*gridZ > 0) {
    HIP_CHECK(hipSymbolTable.hipModuleLaunchKernel(function, gridX, gridY, gridZ, a	  *num_warps, 1, 1, shared_memory, stream, params, 0));
  }
}

typedef struct _DevicePtrInfo {
    hipDeviceptr_t dev_ptr;
    bool valid;
} DevicePtrInfo;

static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
  DevicePtrInfo ptr_info;
  ptr_info.dev_ptr = 0;
  ptr_info.valid = true;
  if (PyLong_Check(obj)) {
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
    return ptr_info;
  }
  if (obj == Py_None) {
    // valid nullptr
    return ptr_info;
  }
  PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
  if(ptr){
    PyObject *empty_tuple = PyTuple_New(0);
    PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
    Py_DECREF(empty_tuple);
    Py_DECREF(ptr);
    if (!PyLong_Check(ret)) {
      PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
      ptr_info.valid = false;
      return ptr_info;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
    if(!ptr_info.dev_ptr)
      return ptr_info;
    uint64_t dev_ptr;
    hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
    if (status == hipErrorInvalidValue) {
        PyErr_Format(PyExc_ValueError,
                     "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
        ptr_info.valid = false;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr;
    Py_DECREF(ret);
    return ptr_info;
  }
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
  return ptr_info;
}

static uint16_t pack_fp16(double f) {
    uint16_t result;
    // from https://github.com/python/pythoncapi-compat
#if 0x030600B1 <= PY_VERSION_HEX && PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
    _PyFloat_Pack2(f, (unsigned char*)&result, 1);
#else
    PyFloat_Pack2(f, (unsigned char*)&result, 1);
#endif
    return result;
}

static uint16_t pack_bf16(double f) {
    float f32 = (float)f;
    uint32_t u32 = *(uint32_t*)&f32;
    return (uint16_t)(u32 >> 16);
}

static uint32_t pack_fp32(double f) {
    float f32 = (float)f;
    return *(uint32_t*)&f32;
}

static uint64_t pack_fp64(double f) {
    return *(uint64_t*)&f;
}

static PyObject* launch(PyObject* self, PyObject* args) {
  int gridX, gridY, gridZ;
  uint64_t _stream;
  uint64_t _function;
  int launch_cooperative_grid;
  PyObject *launch_enter_hook = NULL;
  PyObject *launch_exit_hook = NULL;
  PyObject *kernel_metadata = NULL;
  PyObject *launch_metadata = NULL;
   z; z
  if(!PyArg_ParseTuple(args, "a  ", &launch_cooperative_grid,
                                           &gridX, &gridY, &gridZ, &_stream, &_function,
                                           &kernel_metadata, &launch_metadata,
                                           &launch_enter_hook, &launch_exit_hook z)) {
    return NULL;
  }

  a8  

  // extract kernel metadata
  int num_warps, num_ctas, shared_memory, clusterDimX, clusterDimY, clusterDimZ;
  if (!PyArg_ParseTuple(kernel_metadata, "iiiiii", &num_warps, &num_ctas, &shared_memory, &clusterDimX, &clusterDimY, &clusterDimZ)) {
    return NULL;
  }
  // extract launch metadata
  if (launch_enter_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_enter_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
    Py_DECREF(ret);
  }


  // raise exception asap
  zDevicePtrInfo ptr_infoz = getPointer(_argz); if (!ptr_infoz.valid) return NULL;z;
  _launch(gridX, gridY, gridZ, num_warps, num_ctas, launch_cooperative_grid, clusterDimX, clusterDimY, clusterDimZ, shared_memory, (hipStream_t)_stream, (hipFunction_t)_functiona  );

  if(launch_exit_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_exit_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
    Py_DECREF(ret);
  }

  if(PyErr_Occurred()) {
    return NULL;
  }
  // return None
  Py_INCREF(Py_None);
  return Py_None;
}

static PyMethodDef ModuleMethods[] = {
  {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
  {NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {
  PyModuleDef_HEAD_INIT,
  "__triton_launcher",
  NULL, //documentation
  -1, //size
  ModuleMethods
};

PyMODINIT_FUNC PyInit___triton_launcher(void) {
  if (!initSymbolTable()) {
    return NULL;
  }
  PyObject *m = PyModule_Create(&ModuleDef);
  if(m == NULL) {
    return NULL;
  }
  PyModule_AddFunctions(m, ModuleMethods);
  return m;
}
)	enumeratevaluesrL   _BASE_ARGS_FORMATr   listfilterboolrU   r)   itemsFLOAT_STORAGE_TYPErO   r   FLOAT_PACK_FUNCTIONri   r   )	constantsr   	warp_sizer   idxsr   args_formatformatr   	args_listarg_decl_list	arg_declsinternal_args_listfloat_storage_declsrH   paramsrx   r   r   r   s                     @@@r   make_launcherr      s   4
* '00A)BRBRBT0U&VW&VFC&VIW''93C3C3EF3ER9R=3EFGK,F193C3C3EFGIVD)//#"678I"+I"67"6$!"6I7PST]P^abPbtyy L)//:K LLLhjI M"##  $6r$:#;4s!CD  IbM?$qc!:; # 		-(I"a5C<%%8&<=%%%%QCx&89;%%QCj1 # __&&EA## 	Zb!
"%s+6I"6M5NeTUSVVXY&   12K %I'(F&/oo&7M&7UQ2;LjQCj&7FM
MM#$
- .9M U:`j ux  yB  uC  FG  uG  ae  hq  aq  MO  `P Pyy() *^^g]h iS T]R] U^j 88Y__=NO=NEA#$E!B/=NOPQ R  &x (R S\Q\ ] 88 ! "& 99  R[  Ra  Ra  Rc  d  Rc  IN  IJoqrsotx{o{&qc);A3bCSTUSVVjk  BD  D  Rc  d  e  f fr [^  _q  [r  uv  [v  sw  z~  zC  zC  DV  zW  sW  |~  r +qCCH JY	 XF 8, N| P6 ds5   O)O#*O(<O. O.-O4=	O4+O: &P 0c                    ^  U 4S jnU$ )zF
Replace all tensor descriptors with the base ptr, shape, and strides
c                  ^  > U S [        [        5       nU [        [        5      S  n/ nU Hw  n[        U[        5      (       aN  UR	                  UR
                  /UR                  QUR                  QUR                  QUR                  Q5        Mf  UR                  U5        My     T" / UQUQ76 $ N)	r)   r   r   r   extendbaseshapestridesrO   )args	meta_argsraw_kernel_args
final_argsarglaunchers        r   inner,wrap_handle_tensor_descriptor.<locals>.inner4  s    0#/01	s#4567
"C#/00 !!388"`cii"`#++"`		"`TWT_T_"`a!!#& # 00Z00r    r   )r  r  s   ` r   wrap_handle_tensor_descriptorr  /  s    
1" Lr    c                        \ rS rSrS rS rSrg)HIPLauncheriH  c                 0  ^ [        TS5      (       a  TR                  O	[        5       nU4S jnUR                  5        VVs0 s H  u  pVU" U5      U_M     nnnTR                  R                  5        VVs0 s H  u  pVXV_M	     nnn[        X7UR                  5      m[        TS[        S9n[        S UR                  5        5       5      n	U	(       a  [        UR                  5      OUR                  U l        UR                  U l        g s  snnf s  snnf )Nr   c                 ~   > [        U [        5      (       a&  TR                  R                  R	                  U 5      4$ U $ r   )r   r   fn	arg_namesindex)xrx   s    r   <lambda>&HIPLauncher.__init__.<locals>.<lambda>L  s2    Z3=O=OSVV--33A69VUVVr    __triton_launcherrw   c              3   r   #    U  H-  n[        U[        5      =(       a    UR                  S 5      v   M/     g7f)r   N)r   r   r   )r   r   s     r   r   'HIPLauncher.__init__.<locals>.<genexpr>Q  s+     !vcu\_*S#"6"W3>>,;W"Wcus   57)ro   r   dictr   r   r   r   r   ry   anyr   r  launchlaunch_cooperative_grid)
r~   rx   metadatar   arg_idxr   valuer   r   has_tensor_desc_args
    `        r   r   HIPLauncher.__init__J  s    %,S+%>%>CMMDF	V;D??;LM;LZSWS\5(;L	M25--2E2E2GH2GJCSZ2G	HI(2D2DE%#4GVbc!!vclcscscu!vvCV3CJJ?\_\f\f'/'G'G$ NHs   D7Dc                 >    U R                   " U R                  /UQ76   g r   r  r  )r~   r   s     r   __call__HIPLauncher.__call__V  s    D00848r    r  N)r   r   r   r   r   r  r   r   r    r   r  r  H  s    
H9r    r  c                   \   ^  \ rS rSrU 4S jrS r\S 5       rS rS r	S r
S rS	 rS
rU =r$ )	HIPDriveriZ  c                 V   > [         TU ]  5         [        5       U l        [        U l        g r   )rp   r   rk   utilsr  launcher_cls)r~   rs   s    r   r   HIPDriver.__init__\  s    Z
'r    c                 "    SS K nUR                  $ )Nr   )rC   cudar~   rC   s     r   get_device_interfaceHIPDriver.get_device_interfacea  s    zzr    c                       SS K n U R                  R                  5       =(       a    U R                  R                  S L$ ! [
         a     gf = f)Nr   F)rC   r(  is_availableversionhipImportError)rC   s    r   	is_activeHIPDriver.is_activee  sC    	::**,P%--2C2C42OP 		s   <? 
AAc                     U R                  5       nU R                  R                  U5      n[        R                  R
                  =(       d    US   nUS   n[        SUR                  S5      S   U5      $ )NarchwarpSizer/  rE   r   )get_current_devicer$  r}   r   runtimeoverride_archr   rU   )r~   devicedevice_propertiesr4  r   s        r   get_current_targetHIPDriver.get_current_targetm  se    ((* JJ<<VD}}**G.?.G%j1	

3 2I>>r    c                 J    SS K nUR                  SU R                  5       5      $ )Nr   r(  )rC   r9  r6  r)  s     r   get_active_torch_device!HIPDriver.get_active_torch_devicet  s    ||FD$;$;$=>>r    c                     SSK Jn  U$ )Nr   )do_bench)triton.testingrA  )r~   rA  s     r   get_benchmarkerHIPDriver.get_benchmarkery  s
    +r    c                 \    SS K nSnUR                  [        US-  5      UR                  SS9$ )Nr   i      r(  )r   r9  )rC   emptyint)r~   rC   
cache_sizes      r   get_empty_cache_for_benchmark'HIPDriver.get_empty_cache_for_benchmark}  s.     '
{{3zQ/uyy{PPr    c                 $    UR                  5         g r   )zero_)r~   caches     r   clear_cacheHIPDriver.clear_cache  s    r    )r%  r$  )r   r   r   r   r   r*  staticmethodr1  r;  r>  rC  rJ  rO  r   r   r   s   @r   r"  r"  Z  s@    (
  ??
Q r    r"  )#	functoolsr$   rV   r   pathlibr   tritonr   triton.backends.compilerr   triton.backends.driverr   triton.runtime.buildr   triton.tools.tensor_descriptorr   r?   rM   realpathrN   rL   ry   r@   	lru_cacheri   objectrk   r   r   r   r   r   r  r  r"  r   r    r   <module>r\     s     	  	   . , 8 ;
''//"''**84
5Wi01-` @R @RF?v ?(
.     ! l^29& 9$+	 +r    