U
    hN                     @   s>  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlmZmZ e	G dd dZe	G d	d
 d
Zd'ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z dd Z!d d! Z"d"d# Z#d$d% Z$e%d&kr:dZ&e$  dS )(    N)	dataclass)datetime)Path)Optional)generate_test_dataget_bert_inputsc                   @   sv   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr r   r   K/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/bert_perf_test.pyr   "   s   
r   c                   @   sV   e Zd ZU eed< eed< eed< eed< eed< ee ed< ee ed< eed< d	S )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelinput_tuning_resultsoutput_tuning_results	mask_typeN)r   r   r   r   r   r   r   r   r   r   r   r   3   s   
r      c              	   C   sZ  dd l }|| |r*d| kr*td |r|dkr@ddg}q|dkrRddg}q|d	krfd
ddg}q|dkrxddg}q|dkrdddg}qddg}ndg}| }	||	_|jj|	_|d kr|j	j
|	_nZ|dkr|j	j|	_nF|dkr|j	j|	_n2|dkr|j	j|	_n|dkr|j	j
|	_n||	_|d k	r2||	_|j| |	|d}
|r|dkrfd|
 kstn|dkrd|
 kstn|d	krd
|
 kstd|
 kstn`|dkrd|
 kstnB|dkrd|
 kstd|
 kstnd|
 ks(tnd|
 ks(t|d k	rVt|}|
t| W 5 Q R X |
S )Nr   ZCUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.ZdmlZDmlExecutionProviderZCPUExecutionProviderZrocmZROCMExecutionProviderZmigraphxZMIGraphXExecutionProvidercudaZtensorrtZTensorrtExecutionProvider   r(   c   )Z	providers)onnxruntimeZset_default_logger_severityZget_available_providersprintZSessionOptionsZlog_severity_levelZExecutionModeZORT_SEQUENTIALZexecution_modeZGraphOptimizationLevelZORT_ENABLE_ALLgraph_optimization_levelZORT_DISABLE_ALLZORT_ENABLE_BASICZORT_ENABLE_EXTENDEDr   ZInferenceSessionZget_providersAssertionErroropenZset_tuning_resultsjsonload)r    r   r   r   r.   r   tuning_results_pathr,   Zexecution_providerssess_optionssessionfr   r   r   create_session?   sz    	















r7   c                 C   s,   t jtjt jtjt jtjt jtji}||  S )N)torchZfloat32npZfloat16Zint32Zint64Zlonglong)Z
torch_typeZtype_mapr   r   r   
numpy_type   s        r:   c                    s4    fdd|   D } fdd|  D }||fS )Nc                    s"   i | ]\}}|t | qS r   r8   Z
from_numpyto.0namearraydevicer   r   
<dictcomp>   s      z/create_input_output_tensors.<locals>.<dictcomp>c                    s"   i | ]\}}|t | qS r   r;   r=   rA   r   r   rC      s      )items)inputsoutputsrB   input_tensorsoutput_tensorsr   rA   r   create_input_output_tensors   s    rI   c              
   C   sx   |   }| D ],\}}|||jjdt|j|j|  q| D ],\}}|	||jjdt|j|j|  qF|S Nr   )

io_bindingrD   Z
bind_inputrB   typer:   ZdtypeshapeZdata_ptrZbind_output)sessrG   rH   rK   r?   Ztensorr   r   r   create_io_binding   s(    rO   c                 C   s   g }g }|j rdnd}t|D ]\}}| ||}	||	 i }
tt|D ]}|	| |
|| < qLt||
|\}}t| ||}| | t	
 }| | t	
 | }|| q||fS )Nr)   cpu)r   	enumeraterunappendrangelenrI   rO   Zrun_with_iobindingtimeitdefault_timer)r5   
all_inputsoutput_namestest_settingresultslatency_listrB   _test_case_idrE   resultrF   irG   rH   rK   
start_timelatencyr   r   r   %onnxruntime_inference_with_io_binding   s"    


rb   c           
      C   st   t |dkr| |t| g }g }t|D ]<\}}t }| ||}t | }	|| ||	 q.||fS rJ   )rU   rR   randomchoicerQ   rV   rW   rS   )
r5   rX   rY   r[   r\   r]   rE   r`   r^   ra   r   r   r   onnxruntime_inference   s    
re   c                 C   sl   |  }dtj|  d}|d|j|jdd7 }|d|j d|j	 d|j
 d	|j d
|j 
7 }|S )Nzmodel=,z4graph_optimization_level={},intra_op_num_threads={},zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=z,test_cases=z,test_times=z	,use_gpu=)Zget_session_optionsospathbasenameformatr.   r   replacer	   r
   r   r   r   )r    r5   rZ   r4   optionr   r   r   	to_string   s      0rn   c              	   C   s  t | j|j|j|| j|j| jd}dd | D }t| j||}||krXt	d| d S t	d| g }|j
rt|jD ] }	t||||\}
}|| qvn*t|jD ]}	t|||\}
}|| qt|d }t|}t|d}t|d}t|d	}t|d
}t|d}|jd|  }|||||||f||< t	dt|dt|d | jrtj| j}tj|r|}|ddd  dt   d}t	d|d|d |  }t!|d}t"#|| W 5 Q R X t	d| d S )N)r   r3   c                 S   s   g | ]
}|j qS r   )r?   )r>   outputr   r   r   
<listcomp>   s     z run_one_test.<locals>.<listcomp>zskip duplicated test:zRunning test:  2   K   Z   _   r+   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr*   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$r7   r    r   r   r$   r   r%   get_outputsrn   r-   r   rT   r   rb   extendre   r9   r@   
statisticsZmeanZ
percentiler	   rk   r&   rh   ri   abspathexistsrsplitr   now	timestampZget_tuning_resultsr0   r1   dump)model_settingrZ   perf_resultsrX   r   r5   rY   keyZall_latency_listZ_ir[   r\   Z
latency_msZaverage_latencyZ
latency_50Z
latency_75Z
latency_90Z
latency_95Z
latency_99Z
throughputZoutput_pathZold_output_pathZtrsr6   r   r   r   run_one_test   sr    	

   

$r   c                 C   s,   t jt| ||||fd}|  |  d S )N)targetargs)multiprocessingProcessr   startjoin)r   rZ   r   rX   r   processr   r   r   launch_test7  s    
r   c           	      C   s   |j d k	r t| ||||j  d S tjdd}tjdd}t||h}tdtd|D ]}||krT|| qT|jdd |D ]}t| |||| q|d S )NF)logicalTr*      )reverse)	r   r   psutil	cpu_countlistrT   minrS   sort)	r   rZ   r   rX   r   Zlogical_coresZcandidate_threadsr_   r   r   r   r   run_perf_testsF  s$    
r   c                 C   s|   t | j| j| j| j\}}}td|j d|j d|j  t	|j|j|j|j
|j||||j|j| jd}t| ||| d S )NzGenerating z samples for batch_size=z sequence_length=)r'   )r   r    r!   r"   r#   r-   r   r	   r
   r   r   r   r   r   r'   r   )r   rZ   r   Z	input_idsZsegment_idsZ
input_maskrX   r   r   r   run_performance^  s.    
r   c               
   C   s  t  } | jddtdd | jdddtddd	 | jd
ddtdd | jddtddd | jdddtddd | jddtddddgddd | jddtddd | jddd d!d" | jdd# | jd$dtdddddd%gd&d' | jd(dd d)d" | jdd* | jd+dd d,d" | jdd- | jd.dtd d/d | jd0d1dtd d2d | jd3dtd d4d | jd5dtd d6d | jd7dtd d8d | jd9d td:d; | jd<d td=d; | jd>d?d@tdAd; | jdBdCdd dDd" | jddE | jdFdtddGd |  }|S )HNz--modelTzbert onnx model path)requiredrL   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rL   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rL   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_levelr*   r(   r+   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rL   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severity   z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rL   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rL   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   r   r   r   parse_arguments{  s   		
			r   c                  C   s  t  } | jdkr&tdtd| j | _| jdkr8| j| _t }|	 }t
| j}t|dkrjt|dksrtdt| j| j| j| j| j| j| j| j}|D ]R}t|| j| j| j| j| j| j| j| j| j| j| j| j}t d| t!||| qt"|# ddd	 d
}t$j%&t'| jj(d)| jr&dndd&dd t"t*|D | jt+, -d}t.|ddd}	t/j0|	ddd}
d }|D ]z\}}|1d}|d krdddddddg}|2d d |D  |
3| d!d |D }|2d"d |D  |
3| qW 5 Q R X t d#| d S )$Nr   r*   rq      z batch_size not in range [1, 128]ztest settingFc                 S   s   | d S )Nr*   r   )xr   r   r   <lambda>M      zmain.<locals>.<lambda>)r   r   zperf_results_{}_B{}_S{}_{}.txtZGPUZCPU-c                 S   s   g | ]}t |qS r   )r   r>   r   r   r   r   rp   S  s     zmain.<locals>.<listcomp>z%Y%m%d-%H%M%Szw+rg   )newline	
)	delimiterlineterminatorrf   zLatency(ms)ZLatency_P50ZLatency_P75ZLatency_P90ZLatency_P95ZLatency_P99zThroughput(QPS)c                 S   s   g | ]}| d d qS )=r   splitr   r   r   r   rp   g  s     c                 S   s   g | ]}t |d qS )rv   )rk   r   r   r   r   rp   j  s     c                 S   s   g | ]}| d d qS )r   r*   r   r   r   r   r   rp   k  s     zTest summary is saved to)4r   r   maxr   Zsamplesr   r
   r   Managerdictsetr	   r   	Exceptionr   modelr!   r"   r#   r$   r%   r&   r'   r   r   r   r   r   r   r   r   r   r-   r   sortedrD   rh   ri   r   r   parentrk   r   r   r   strftimer0   csvwriterr   rz   writerow)r   managerr   Zbatch_size_setr   r	   rZ   Zsorted_resultsZsummary_fileZtsv_fileZ
tsv_writerheadersr   Zperf_resultparamsvaluesr   r   r   main  s    




	

	
r   __main__)Nr(   N)'r   r   r1   r   rh   rc   r{   rV   Zdataclassesr   r   pathlibr   typingr   Znumpyr9   r   r8   Zbert_test_datar   r   r   r   r7   r:   rI   rO   rb   re   rn   r   r   r   r   r   r   r   __spec__r   r   r   r   <module>   sN      
Y

E #T
