U
    h5V                     @   sB  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlZe e!Z"e j#dd	d
Z$e j#dddZ%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-e!dkr>e-  dS )    N)measure_memorysetup_logger)get_library_path)ORTModelForSpeechSeq2Seq)ProfilerActivityprofilerecord_function)trange)AutoModelForSpeechSeq2SeqWhisperConfigWhisperProcessorargsc                    s   j dkrtd fdd fdd j j j j j jd} j dkr| D ],\}}t	j
|gd	|krxt	jnt	jd
||< qZ jrt	j
 jgt	jd
|d<  jrt	j
 jgt	jd
|d< td j  fdd}t | j | j} jr||d< |S td  j dkr*dnd fdd}t || ||} j dkrh||d< |S |j jrztjntj jd|d<  j|d< d|d< d|d<  jr j|d< |S )N>   hf-ortorthf-pt-compilehf-pt-eagerz/Unable to auto-detect inputs for provided modelc                     s   t  j} t | } | S N)whisperZ
load_audio
audio_pathZpad_or_trimaudior    U/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/models/whisper/benchmark.pyload_via_ffmpeg   s    
z#get_inputs.<locals>.load_via_ffmpegc               	      s@   t  jd*} tjt|  tjd}t|g}W 5 Q R X |S )Nrbdtype)openr   npZasarraylistreadZuint8array)fr   r   r   r   load_via_numpy#   s    z"get_inputs.<locals>.load_via_numpy)
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   Zpenaltyr   decoder_input_idslogits_processorzLoad audio: c                    s   | r
 S   S r   r   )Zonnx_e2e)r   r$   r   r   <lambda><       zget_inputs.<locals>.<lambda>audio_streamzFeature extraction: r   ptc                    s    j j| g jdjS )N)Zreturn_tensorssampling_rate)	processorZfeature_extractorr1   input_featuresr   )r   return_typer   r   r-   H   s     r3   )r   deviceinputsno_repeat_ngram_sizeTZearly_stopping	use_cacheZforced_decoder_ids)benchmark_type	Exceptionr%   r&   r'   r(   r)   r*   itemsr   r"   float32Zint32has_decoder_input_idsr+   has_logits_processorr,   loggerinfor   time_fnhas_audio_streamtouse_fp16torchfloat16target_devicer7   )r   r6   kvZload_audio_fnZ
audio_dataZprocessor_fnr3   r   )r   r   r$   r4   r   
get_inputs   sV    

&

 


rJ   c                 C   s  d\}}d\}}| j dkrx| jr&| jn| j}t }tj|| jrFtjntj	dd
| j}t }| j dkrt|}nd| j dkrt }| j|_|t  | jrd|_d|_| jrtd td ntd	| j  | j d
krRt| jtkr| jd n| j}t| jtkr"| jd nd }t }tj| j| jdk|||d}t }| j dkrt !d| j"  t }tj#| j"|| jgd}t }t !d||  d |S )N)NN   r   r   T)Ztorch_dtyper8   r   >   r   r      r   Cannot recognize r   cpu)Zuse_io_bindingproviderprovider_optionsZsession_optionsr   zLoading model from )Z	providerszLoaded model in  s)$r9   Zhf_pt_model_path
model_nametimer
   from_pretrainedrD   rE   rF   r<   rC   rG   compiler   ZSessionOptionsr   Zenable_profilingZregister_custom_ops_libraryr   verboseZlog_verbosity_levelZlog_severity_leveltuneZset_default_logger_severityZset_default_logger_verbosityr:   typeexecution_providertupler   hf_ort_dir_pathr5   r?   r@   ort_model_pathZInferenceSession)r   modelZsess_options
start_timeend_timesourcerO   rP   r   r   r   	get_model`   sb    



 ra   c                 C   sH  t |tkr|d n|}t |tkr,|d n|}| jdkrDt| jnt| jtjdd}| jrn||}t	
| |D ]}|| qr| jdkrtj  t }| jdkrt| jnt| jtjdd}	|	D ]}|| q| jdkrtj  t }
| jdkrt	
d d}|
| | j }|| }t	
d	| d
 t	
d| d d S )Nr   rL   r   zWarm up)filedescrN   Z	Benchmark z	Latency: rQ   zThroughput: z qps)rX   rZ   r9   rangeZwarmup_runsr	   sysstdoutrV   r?   r@   r5   rE   cudaZsynchronizerS   Znum_runs)r   fnr6   Zwarmup_inputsZbenchmark_inputsZwarmup_rangeoutputs_r^   Zbench_ranger_   Z
batch_sizeZlatencyZ
throughputr   r   r   rA      s>    







rA   c           	      C   s   | j   d| j d| j d|jdd d| dtj d}d }| j dkrtt	j
t	jgddd"}td || W 5 Q R X W 5 Q R X |jdd	j| j| jd
}tj| j| d}t|d}|| W 5 Q R X n|| | d}|S )N-rk   z%Y-%m-%d_%H:%M:%SrK   T)Z
activitiesZrecord_shapesZprofile_memoryZmodel_inference   )Zgroup_by_stack_n)Zsort_byZ	row_limitz.logw.json)r9   lower	precisionr5   __name__replacedatetimenowr   r   ZCPUZCUDAr   Zkey_averagestableZpt_filter_byZpt_num_rowsospathjoin
log_folderr   write)	r   ri   r6   Zinputs_typeprefixfilenameZprofZ	prof_datar#   r   r   r   
profile_fn   s$    B

  

r~   c                    s   t  }t|}|jdd   td|jd d d t  t	j
  t| jdk fdd| jd tj  d S )	Ng?)intervalzCPU usage: %rN   c                      s    S r   r   r   ri   r6   r   r   r-      r.   zmeasure_fn.<locals>.<lambda>)Zis_gpufuncmonitor_type)rw   getpidpsutilProcessZcpu_percentr?   r@   gcZcollectrE   rh   Zempty_cacher   r5   r   rf   rg   flush)r   ri   r6   pidprocessr   r   r   
measure_fn   s    

 r   c           
         s  fdd fdd}|} j dkr0||  jrVt ||d} j dkrR|d td  }jj }|d	 }tj	|rt
d
| d|  t|tj j| jj }|d }tj	|rt
d
| d|  t|tj j| jj }|d }tj	|rRt
d
| d|  t|tj j| d S t
d t || ||\}}	t
dt|d  d t
d|	d   t || d S )Nc                    s    j f | }|S r   )generate)r6   predicted_idsr]   r   r   get_pred_ids  s    z&run_hf_inference.<locals>.get_pred_idsc                    s>   | }g }t  jD ]}| jj|ddd  q||fS )NTZskip_special_tokensr   )re   r(   appendr2   batch_decode)r6   r   transcriptionrk   )r   r   r   r   gen_and_dec	  s
    z%run_hf_inference.<locals>.gen_and_decr   zgen-and-decr   ro   z-encoder.json	Renaming  to z-decoder.jsonz-decoder-with-past.jsonz
Evaluating PyTorch...Generated token length: r    tokensTranscription: )r9   r   r~   lenencodersessionend_profilingrw   rx   isfiler?   warningrenamery   rz   decoderZdecoder_with_pastr@   rA   r   )
r   r6   r]   r   generate_fnnew_logname
new_prefixold_lognamer   r   r   )r   r   r]   r   run_hf_inference  s>    

r   c                    sr  d fdd	}fdd}fdd} fdd	} j d
krB|n|}||} jrt ||d}	 }
td|
 d|	  t|
tj	 j
|	 d S td |} jr||dd}||f}t || ||} j d
kr| }|d } jrtd|d d   nN||d d }tdt| d  jj|d ddd }td|  t || d S )NFc                    s   t tdd  }t |  }|| }t|rJtd|  td|r` jr`| d | d< || }t|r|D ]}t	d| d | |= qt j
d	kr }|  D ]\}}	|||	 q D ]}
|j|
j j
 jd
 q|S | S )Nc                 S   s   | j S r   nameZmodel_inputr   r   r   r-   E  r.   z?run_ort_inference.<locals>.prepare_ort_inputs.<locals>.<lambda>z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.r%   r&   zRemoving unnecessary input 'z' from user provided inputsrN   )Zdevice_type	device_id)setmaprJ   keysr   r?   errorr:   rW   r@   r5   
io_bindingr;   Zbind_cpu_inputget_outputsZbind_outputr   r   )r6   warmupZmodel_inputsZuser_inputsZmissing_inputsZunnecessary_inputsZunnecessary_inputr   rH   rI   outputr   r]   r   r   prepare_ort_inputsC  s*    

z-run_ort_inference.<locals>.prepare_ort_inputsc                    s     |  | S r   )Zrun_with_iobinding)r   r   r   r   with_io_bindinga  s    
z*run_ort_inference.<locals>.with_io_bindingc                    s     d | }|S r   )run)r6   rj   r   r   r   without_io_bindingf  s    z-run_ort_inference.<locals>.without_io_bindingc                    s6    j | kr2t|  j kd d }| d |d  S | S )Nr   rL   )eos_token_idr   where)r   Z	first_endr   r   r   handle_outputk  s    
z(run_ort_inference.<locals>.handle_outputrN   Ze2er   r   z
Evaluating ONNX Runtime...T)r   r   r   r   r   r   )F)r5   r   r~   r   r?   r   rw   r   rx   ry   rz   r@   rW   rA   Zcopy_outputs_to_cpurB   r   r2   r   r   )r   r6   r]   r   r   r   r   r   Z
ort_inputsr   r   Zort_evaluate_inputsZort_warmup_inputsZort_outputsZactual_outputr   r   r   r   run_ort_inferenceB  s:    

r   c                 C   sD   | j dkrt| || n(| j dkr0t| || ntd| j  d S )N>   r   r   r   r   rM   )r9   r   r   r:   )r   r6   r]   r   r   r   run_inference  s
    

r   c               	   C   s
  t  } | jddtdddddgd | jd	d
tddd | jddtdddddgdd | jdtddd | jdtddd | jdtddd | jddtddd | jdd ttj rd!nd"d"d!d#gd$ | jd%d&td'd( | jd)d*td+d( | jd,d-td.d( | jd/td0d( | jd1td2d3d | jd4td5d( | jd6td'd( | jd7td8d( | jd9td8d( | jd:td;d( | jd<td;d( | jd=td>d( | jd?td@dAd | jdBtd8dCd | jdDdEdFdG | jdHtdIdJd | jdKtdLdMd | jdNdEdFdG | jdOtt	j
dPdQd | jdRdEdFdSdT |  }tj|j t|j |j|_d|jkr|j  dU|_|jdVkr|jdW|jif|_n4|jdXkr|j|jd8|jrd8nd'dYf|_d!|_|jdkr|jstdZ|jdkr|jstd[t|j|_|S )\Nz-btz--benchmark-typeTr   r   r   r   )rX   requiredchoicesz-mz--model-namez;Hugging Face name of model (e.g. 'openai/whisper-large-v2'))rX   r   helpz-pz--precisionZfp32Zint8fp16zePrecision for model. For ONNX models, the model's precision should be set before running this script.)rX   r   defaultr   r   z--hf-pt-model-pathrd   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))rX   r   r   z--hf-ort-dir-pathzaPath to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-az--audio-pathz%Path to audio file for E2E evaluationz-dz--devicerh   rN   Zrocm)rX   r   r   z-idz--device-idr   )rX   r   z-wz--warmup-runsrm   z-nz
--num-runs
   z--seed   z--sampling-ratei>  zSampling rate for audio (in Hz)z--max-lengthi  z--min-lengthz--num-beamsrL   z--num-return-sequencesz--length-penaltyg      ?z--repetition-penaltyz--no-repeat-ngram-size   z--decoder-input-idsz[]zThe forced decoder ids for generation. Format is [start token, timestamp token, language token, task token]. Default is [start token]. See `decoder_input_ids` in https://github.com/microsoft/Olive/tree/main/examples/whisper for details.z--logits-processorzType of logits processor to use. See `BeamSearch` in https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/core/graph/contrib_ops/contrib_defs.cc for details.z	--profileF
store_true)r   actionz--pt-filter-byZself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--tunezFOnly used by ROCm EP, enable TunableOp tuning to select fastest kernel)r   r   r   ZExecutionProviderZCUDAExecutionProviderr   ZROCMExecutionProvider)r   Ztunable_op_enableZtunable_op_tuning_enablez,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path`)argparseArgumentParseradd_argumentstrrE   rh   Zis_availableintfloatrw   rx   ry   
parse_argsr   randomseedZmanual_seedr5   r   r9   upperrY   r   rW   r[   AssertionErrorr\   astliteral_evalr+   )parserr   r   r   r   r     s    

   r   c                  C   s6  t  } t| j t| j dtjj_	t
| j}t| j}| jdkrTd| j n| j}| jdk}t| d| t| d| t| d| t| dd	 t| d
|j td| j  t| }| jdkrttdd | }d|k| _t| dd|k t| dd|k | jg kr|jg| _t| }t| || d S )NTrN   zcuda:r   r2   rG   rD   rB   Fr   zForced decoder prompt ids: r   c                 S   s   | j S r   r   r   r   r   r   r-   D  r.   zmain.<locals>.<lambda>r/   r=   r+   r>   r,   )r   r   rV   r?   r@   __dict__rE   backendsZcudnnZ	benchmarkr   rT   rR   r   r5   r   rq   setattrr   r+   ra   r9   r   r   rJ   rB   Zdecoder_start_token_idr   )r   configr2   rG   rD   r]   Zort_model_inputsr6   r   r   r   main-  s0    




r   __main__).r   r   rt   r   loggingrw   rf   rS   Znumpyr   r   rE   r   Zbenchmark_helperr   r   Zonnxruntime_extensionsr   Zoptimum.onnxruntimer   Ztorch.profilerr   r   r   Ztqdmr	   Ztransformersr
   r   r   Zonnxruntimer   	getLoggerrr   r?   	NamespacerJ   ra   rA   r~   r   r   r   r   r   r   r   r   r   r   <module>   s@   
FC0@W	 #
