U
    hW                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZeeZG d	d
 d
eZG dd deZ G dd dZ!dej"iZ#dddddi fddZ$d8ddZ%d9ddZ&dd Z'dd Z(dd Z)dd  Z*d:d!d"Z+ej,d fd#d$Z-d%d& Z.d;d(d)Z/eeee0ef   d*d+d,Z1G d-d. d.eZ2G d/d0 d0e2Z3G d1d2 d2e2Z4d<d4d5Z5d6d7 Z6dS )=    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)AnyDictListOptional)versionc                   @   s$   e Zd ZdZdZdZdZdd ZdS )	PrecisionZfp32Zfp16Zint8Zint4c                 C   s   | j S Nvalueself r   M/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/benchmark_helper.py__str__&   s    zPrecision.__str__N)__name__
__module____qualname__ZFLOAT32ZFLOAT16ZINT8ZINT4r   r   r   r   r   r       s
   r   c                   @   s    e Zd ZdZdZdZdd ZdS )OptimizerInfoZno_optZby_ortZ	by_scriptc                 C   s   | j S r   r   r   r   r   r   r   1   s    zOptimizerInfo.__str__N)r   r   r   ZNOOPTZBYORTZBYSCRIPTr   r   r   r   r   r   *   s   r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )ConfigModifierc                 C   s
   || _ d S r   
num_layers)r   r   r   r   r   __init__6   s    zConfigModifier.__init__c                 C   s~   | j d krd S t|dr2| j |_td| j   t|drV| j |_td| j   t|drz| j |_td| j   d S )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r   hasattrr   loggerinfor   Zdecoder_layers)r   configr   r   r   modify9   s    



zConfigModifier.modifyc                 C   s   | j S r   r   r   r   r   r   get_layer_numF   s    zConfigModifier.get_layer_numN)r   r   r   r   r$   r%   r   r   r   r   r   5   s   r   float32TFc                    s:  d }z
t  }	|r t jj|	_n
t jj|	_|r4d|	_|dkrT||	_t	d|	j  |r`d|	_
nd|	_
t	d|   |r|dkrddg}
q|d	krd
dg}
q|dkrdd
dg}
q|dkrddg}
q|dkrdddg}
qddg}
ndg}
 r fdd|
D }
t j| |	|
d}W n$ tk
r4   tjddd Y nX |S )NTr   z%Session option: intra_op_num_threads=   zCreate session for onnx model: dmlDmlExecutionProviderZCPUExecutionProviderrocmROCMExecutionProviderZmigraphxMIGraphXExecutionProvidercudaCUDAExecutionProviderZtensorrtZTensorrtExecutionProviderc                    s$   g | ]}| kr| | fn|qS r   r   ).0nameprovider_optionsr   r   
<listcomp>   s     z.create_onnxruntime_session.<locals>.<listcomp>)	providers	Exception)exc_info)onnxruntimeZSessionOptionsZGraphOptimizationLevelZORT_ENABLE_ALLZgraph_optimization_levelZORT_ENABLE_BASICenable_profilingZintra_op_num_threadsr!   debugZlog_severity_levelZInferenceSessionr6   error)Zonnx_model_pathuse_gpuproviderZenable_all_optimizationnum_threadsr9   verboser3   sessionZsess_optionsr5   r   r2   r   create_onnxruntime_sessionP   sP    





rA   c                 C   s6   | rt jddd nt jdd tdtj d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(message)s)rD   transformers)coloredlogsinstalllogging	getLoggersetLevelWARNING)r?   r   r   r   setup_logger   s    rL   c                 C   s   | rt j| st |  |r4t j|s4t | |rv|dkrVdt ksvtdn tt dddgrvtdt	
dtj  t	
d	tj  t	
d
tj  ttjtdkstttjtdkstttjtdkstd S )Nr)   r*   zBPlease install onnxruntime-directml package to test GPU inference.r/   r,   r-   zWPlease install onnxruntime-gpu package, or install ROCm support, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsr8   Zget_available_providersAssertionErrorset
isdisjointr!   r"   torch__version__rE   r   parse)	cache_dir
output_dirr<   r=   r   r   r   prepare_environment   s*    


rY   c                 C   s   t | tt|  d }tj| tjdd }|d|  }t| |dt| dd dt| dd dt| dd d|d|ddS )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarZfloat64Z
percentile)latency_list
batch_sizeZ
latency_msr_   Z
throughputr   r   r   get_latency_result   s    rl   c                 C   s   t |dddd^}ddddd	d
dddddddddddddddg}tj||d}|  | D ]}|| qZW 5 Q R X td|  d S )Na asciimodenewlineencodingenginer   r5   device	precision	optimizer
io_binding
model_nameinputsthreadsrk   sequence_lengthcustom_layer_numr   r^   rd   rc   r_   r`   ra   rb   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr!   r"   )resultscsv_filenamecsv_filecolumn_names
csv_writerresultr   r   r   output_details   s8    r   c                    s  t |dddd}ddddd	d
dddddg g }|jD ]D}|jdgkrZ|d|  q8|jD ]}|d| d|  q`q8tj| | d}|  |jD ]0}dD ]$}	|jD ]}
dD ]
}|j	D  ]}i }| D ]}|d |kr|d |	kr|d |
kr|d |kr|d |kr؇ fdd|
 D }|sT|| |dd |D  n" D ]}|| || ksXtqX|d }|d }|r|d |d| d| < q|d |d| < q|r|| qqqqqW 5 Q R X td|  d S )Nrm   rn   ro   rp   ry   rz   r}   rt   r   r5   ru   rv   rw   rx   r{   bZ_sr~   )         )TFrn   c                    s   i | ]\}}| kr||qS r   r   )r0   kvheader_namesr   r   
<dictcomp>  s       z"output_summary.<locals>.<dictcomp>c                 S   s   i | ]
}|d qS )rn   r   )r0   r   r   r   r   r     s      rk   r|   rc   z'Summary results are saved to csv file: )r   Zbatch_sizesZsequence_lengthsappendr   r   r   modelsZenginesr>   itemsupdaterQ   r   r!   r"   )r   r   argsr   Z
data_namesrk   r|   r   ry   Zinput_countZengine_namerx   r{   rowr   headersr   r   sr   r   r   output_summary   sh    









r   c              	   C   s   t |dddd}dttt|   }tj||d	}|  | D ]N}t	t
 | | d< tj| | d< tj| | d< || | d< || |  qHW 5 Q R X td
|  d S )Nrm   rn   ro   rp   model_filenamer   rE   rT   r~   z(Fusion statistics is saved to csv file: )r   r   rE   rT   )r   listnextitervalueskeysr   r   r   strr   nowrE   rU   rT   r   r!   r"   )Zmodel_fusion_statisticsr   r   r   r   keyr   r   r   output_fusion_statistics%  s"        r   c                    sd   i }t j fddd|d t j fddd|d}|| |ddi |t|| |S )Nc                      s    d  S r   runr   
ort_inputsort_sessionr   r   <lambda>;      zinference_ort.<locals>.<lambda>r   numberrepeatc                      s    d  S r   r   r   r   r   r   r   <  r   rx   F)timeitr   r   rl   )r   r   result_templaterepeat_timesrk   warm_up_repeatr   rj   r   r   r   inference_ort9  s    
r   c              
      s&  i }   |D ]^}t|| |	}t|| jtkrLtt|| j n|
} ||jj	d||j
|  qt|dkrt|||	 t|D ]4\}} ||| jj	dtj|| j
||   qtj fddd|d tj fddd|d}|| |ddi |t|| |S )	Nr   c                      s
     S r   Zrun_with_iobindingr   rx   r   r   r   r   t  r   z/inference_ort_with_io_binding.<locals>.<lambda>r   r   c                      s
     S r   r   r   r   r   r   r   z  r   rx   T)rx   rT   Z
from_numpytor   rZ   IO_BINDING_DATA_TYPE_MAPZ
bind_inputru   typeshapeZdata_ptrrg   allocateOutputBuffers	enumerateZbind_outputrh   r&   r   r   r   rl   )r   r   r   r   Zort_output_namesZort_outputsoutput_buffersoutput_buffer_max_sizesrk   ru   Z	data_typer   r   r1   Znp_inputZ
input_typeiZort_output_namerj   r   r   r   inference_ort_with_io_bindingC  sR    	

	
r   c                 C   s&   |D ]}|  tj|tj|d qd S )N)rZ   ru   )r   rT   emptyr&   )r   r   ru   r   r   r   r   r     s    r   {   c                 C   s<   t |  tj |  t|  tj|  tj|  dS )z5Set random seed manually to get deterministic resultsN)randomseedrh   rT   Zmanual_seedr.   Zmanual_seed_all)r   r   r   r   set_random_seed  s
    

r   returnc               
   C   s   ddl m} m}m}m}m}m}m} z||  g }| }t|t	sFW d S t
|D ]F}	|||	}
t|
trp W d S ||	|||	|
j|
j|
jd qN|  |W S  | k
r } ztd| W Y d S d }~X Y nX d S )Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idr1   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr   r   r   r   r   r   r   
isinstanceintranger   r   r   r   r   print)r   r   r   r   r   r   r   r   device_countr   r"   r;   r   r   r   get_gpu_info  s0    $



	
r   c                   @   s@   e Zd Zd
ddZdd Zeeeee	e
f   dddZd	S )MemoryMonitorTc                 C   s
   || _ d S r   )keep_measuringr   r   r   r   r   r     s    zMemoryMonitor.__init__c                 C   s@   dd l }d}t||t  jd }td | jsq<q|S )Nr      {Gzt?)	psutilmaxProcessrM   getpidZmemory_infoZrssr   r   )r   r   	max_usager   r   r   measure_cpu_usage  s    zMemoryMonitor.measure_cpu_usager   c                 C   s
   t  d S r   )NotImplementedErrorr   r   r   r   measure_gpu_usage  s    zMemoryMonitor.measure_gpu_usageN)T)r   r   r   r   r   r   r   r
   r	   r   r   r   r   r   r   r   r     s   
r   c                       s<   e Zd Zd fdd	Zeeeeef   dddZ	  Z
S )CudaMemoryMonitorTc                    s   t  | d S r   )superr   r   	__class__r   r   r     s    zCudaMemoryMonitor.__init__r   c           
   
      sD  ddl m}m}mm}mm}m} g g  z|  | }t|t	sZt
d|  W d S dd t|D fddt|D  t|D ]J}||}t|trt
d|   W d S t| |jd |< qtd	 | jsqq|   fd
dt|D W S  |k
r> }	 zt
d|	 W Y d S d }	~	X Y nX d S )Nr   r   z*nvmlDeviceGetCount result is not integer: c                 S   s   g | ]}d qS r   r   r0   r   r   r   r   r4     s     z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>c                    s   g | ]} |qS r   r   r   )r   r   r   r   r4     s     z%nvmlDeviceGetMemoryInfo returns str: r   r   c                    s    g | ]}| | | d qS )Z	device_idr1   max_used_MBr   r   gpu_namemax_gpu_usager   r   r4     s
   r   )r   r   r   r   r   r   r   r   r   r   r!   r;   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r"   r;   r   )r   r   r   r   r   r     s6    $


z#CudaMemoryMonitor.measure_gpu_usage)T)r   r   r   r   r   r
   r	   r   r   r   __classcell__r   r   r   r   r     s   r   c                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	RocmMemoryMonitorTc                    sn   t  | d}tj|r2|tjkr2tj| zdd l}|| _| j  W n t	k
rh   d | _Y nX d S )Nz/opt/rocm/libexec/rocm_smir   )
r   r   rM   rN   rO   sysr   rocm_smiZinitializeRsmiImportError)r   r   Zrocm_smi_pathr   r   r   r   r     s    
zRocmMemoryMonitor.__init__c                 C   s(   | j d krdS | j |dd d d S )Nr'   ZVRAMr   i   )r   Z
getMemInfo)r   devr   r   r   get_used_memory  s    
z!RocmMemoryMonitor.get_used_memoryc                    s   | j d krd S | j d k	r&t| j  nd}dd t|D dd t|D  t|D ]}t| | ||< qVtd | jsNqqN fddt|D S )Nr   c                 S   s   g | ]}d qS r   r   r   r   r   r   r4     s     z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>c                 S   s   g | ]}d | qS )ZGPUr   r   r   r   r   r4     s     r   c                    s    g | ]}| | | d qS r   r   r   r   r   r   r4   %  s
   )	r   rg   ZlistDevicesr   r   r   timer   r   )r   r   r   r   r   r   r     s    

z#RocmMemoryMonitor.measure_gpu_usage)T)r   r   r   r   r   r   r   r   r   r   r   r     s   r   r.   c              
   C   s  d }|dkrt }nt}|d}| rF|d k	r2|}n| }|d krFd S |d krR|S t }| }||j}z||}
|
 }W 5 d|_| }	X |	d krW 5 Q R  d S td| d|	  t|dkr8t|	dkr8t|t|	kr8d}t	|D ].\}}|d }|	| d }|| }t
||}q|W  5 Q R  S W 5 Q R X d S |d k	rV|}n| }|d krl|S t j}| }||j}z||}
|
 }W 5 d|_| }	X td|d	d
|	d	d |	| W  5 Q R  S Q R X d S )Nr+   FzGPU memory usage: before=z  peak=r   r   r   zCPU memory usage: before=z.1fz
 MB, peak=z MB)r   r   r   r   Zsubmitr   r   r   rg   r   r   r   )Zis_gpufuncZmonitor_typeZstart_memoryZmemory_monitor_typeZmonitorZmemory_before_testexecutorZ
mem_threadr   Z	fn_thread_Zmax_usedr   Zmemory_beforebeforeafterr   r   r   r   measure_memory/  s`    

.



r  c                  C   sV   dddddddg} d}| D ]6}t |}|d kr2q|r>|d	7 }|| d
| 7 }q|S )NZORT_DISABLE_FUSED_ATTENTIONZ!ORT_ENABLE_FUSED_CAUSAL_ATTENTIONZ!ORT_DISABLE_FUSED_CROSS_ATTENTIONZORT_DISABLE_TRT_FLASH_ATTENTIONZ&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONZORT_TRANSFORMER_OPTIONSZORT_CUDA_GEMM_OPTIONSrn   ,=)rM   getenv)Z	env_namesenvr1   r   r   r   r   get_ort_environment_variabless  s"    	
r  )T)N)r   )r   )r.   N)7r   rH   rM   r   r   r   r   abcr   r   concurrent.futuresr   r   enumr   r   typingr   r	   r
   r   rF   rh   rT   rE   	packagingr   r8   rI   r   r!   r   r   r   r&   r   rA   rL   rY   rl   r   r   r   r   Zlonglongr   r   r   r   r   r   r   r   r  r  r   r   r   r   <module>   sd   

 	
B

":

A
&2+
D