U
    h/                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ e	e
Zdd Zdd Zdd Zd	d
 Zdd Ze
dkre  dS )    N)setup_loggerc                  C   s  t  } | jddtdd | jddtdd | jdd	td
d | jddtdd | jddddd | jddddd | jdtddd | jdtddd | jdtddd | jdtddd  | jd!tdd"d#d$d%gd&d' | jd(tdd)d*d+gd,d' | jd-td.d/d | jd0ddd1d | jd2td3d4d |  }t|d5|jd6d7 	d8d9 d:|j
 d;|j }t|d<| tj|jdd= | jd>9  _|S )?Nz-b--batch-sizesz1 2)typedefaultz-s--sequence-lengthsz8 16 32 64 128 256 512z-w--warmup-runs   z-n
--num-runs  z--hf-pt-eagerF
store_truez,Benchmark in PyTorch without `torch.compile`)r   actionhelpz--hf-pt-compilez)Benchmark in PyTorch with `torch.compile`--hf-ort-dir-path zDPath to folder containing ONNX models for Optimum + ORT benchmarking)r   r   r   z--ort-msft-model-pathzAPath to ONNX model from https://github.com/microsoft/Llama-2-Onnxz --ort-convert-to-onnx-model-pathz'Path to ONNX model from convert_to_onnx--model-nameTzModel name in Hugging Face)r   requiredr   --precisionZint4Zint8Zfp16Zfp32zPrecision to run model)r   r   choicesr   --devicecpuZcudaZrocmzDevice to benchmark modelsz--device-idr   zGPU device IDz	--verbosezPrint detailed logsz	--timeout
   z8Number of mins to attempt the benchmark before moving on
model_size/.-z./_
log_folder)exist_ok<   )argparseArgumentParseradd_argumentstrint
parse_argssetattr
model_namesplitreplacer   	precisionosmakedirsr   timeout)parserargsZlog_folder_name r0   W/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/models/llama/benchmark_all.pyget_args   s    
 r2   c              
   C   s  g }d\}}}d\}}}	}
d}d}d}d}d}d}d	}t |`}|D ]R}|d
d}||krvt|t|d  }qF||krt|t|d  }qF||krd}qF||krd}qF||krt|t||d }|d }qF||krt|t||d }	qF||krFd|kr:t||dd |d d }
n@|||t| d  dd}t||  d }t|d }
|||||||	|
g }|	| qFW 5 Q R X |S )N)NNN)NNNNzBatch Size: zSequence Length: zto get past_key_valueszwith past_key_valuesz	Latency: zThroughput: zpeak=
r   promptz	per-token r
   ZCPU=   z MB'"Zmax_used_MB)
openr)   r$   lenfloatrfindfindjsonloadsappend)	device_idlog_filebase_resultsentriesZ
batch_sizeZsequence_lengthstepZ	latency_sZ
latency_msZ
throughputZmemoryZbatch_patternZsequence_patternZprompt_step_patternZper_token_step_patternZlatency_patternZthroughput_patternZmemory_patternfZ
input_linelineZpeakusageentryr0   r0   r1   process_log_file   sV    




&"	rK   c                 C   s   dd l }|j| dddddddd	d
dg
d}|d d|d< |d d|d< |d d|d< |d	 d|d	< |d
 d|d
< |d d|d< |j|dd td| d d S )Nr   ZEngineZ	PrecisionZDevicez
Batch SizezSequence LengthZStepzLatency (s)zLatency (ms)zThroughput (tps)zMemory (GB))columnsr$   r<   F)indexzResults saved in !)ZpandasZ	DataFrameZastypeZto_csvloggerinfo)resultsfilenamepdZdfr0   r0   r1   save_results   s.    rT   c           	   	   C   s   | dt j  dd}tj| j|}t|dF}tj|||d}z|	| j
 W n tjk
rr   |  Y nX W 5 Q R X td || j| jg}t| j||}|S )Nr   %Y-%m-%d_%H:%M:%Sz.logw)stdoutstderrz Gathering data from log files...)datetimenowr+   pathjoinr   r:   
subprocessPopenwaitr-   TimeoutExpiredkillrO   rP   r*   devicerK   rB   )	r/   benchmark_cmdZengineZlog_filenameZlog_pathrC   processrD   rQ   r0   r0   r1   	benchmark   s    
re   c                  C   s  t  } t| j t| j dtjj_	g }t
| jtjd< | jrdddddd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jdg}td t	| |d}|| | jrdddddd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jdg}td t	| |d}|| | jrdddddd| jd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jdg}td t	| |d}|| | jrdddddd| jd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jg}td t	| |d}|| | jrvdddddd| jd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jg}td t	| |d}|| | j d | j d tj d!d"}t|tj !| j| d S )#NTZCUDA_VISIBLE_DEVICESpythonz-mzmodels.llama.benchmarkz--benchmark-typezhf-pt-eagerr   r   r   r   r   r   r	   z--log-folderz--authz'Benchmark PyTorch without torch.compilezpytorch-eagerzhf-pt-compilez$Benchmark PyTorch with torch.compilezpytorch-compilezhf-ortr   z Benchmark Optimum + ONNX Runtimezoptimum-ortzort-msftz--ort-model-pathz)Benchmark Microsoft model in ONNX Runtimezort-convert-to-onnxz/Benchmark convert_to_onnx model in ONNX RuntimeZonnxruntimer   rU   z.csv)"r2   r   verboserO   rP   __dict__torchbackendsZcudnnre   r#   rB   r+   environZhf_pt_eagerr'   r*   Zbatch_sizesZsequence_lengthsrb   Zwarmup_runsZnum_runsr   extendZhf_pt_compileZhf_ort_dir_pathZort_msft_model_pathZort_convert_to_onnx_model_pathr   rY   rZ   rT   r[   r\   )r/   Zall_resultsrc   rQ   Zcsv_filer0   r0   r1   main   s&   











"rm   __main__)r    rY   r?   loggingr+   r]   ri   Zbenchmark_helperr   	getLogger__name__rO   r2   rK   rT   re   rm   r0   r0   r0   r1   <module>   s    
{9 (