
    h;                         S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SK	J
r
  \R                  " \5      rS rS rS rS rS r\S	:X  a  \" 5         gg)
    N)setup_logger)BenchmarkRecordc                  p   [         R                  " 5       n U R                  SS[        SS9  U R                  SS[        SS9  U R                  SS	[        S
S9  U R                  SS[        SS9  U R                  SSSSS9  U R                  SSSSS9  U R                  S[        SSS9  U R                  S[        SSS9  U R                  S[        SSS9  U R                  S[        SSS9  U R                  S [        S!S"S#9  U R                  S$[        S!/ S%QS&S'9  U R                  S([        S!/ S)QS*S'9  U R                  S+[        S,S-S9  U R                  S.SSS/S9  U R                  S0[        S1S2S9  U R                  S3[        S S4S9  U R                  5       n[        US5UR                  R                  S65      S7   R                  S8S95      5        S:UR                   S;UR                   3nUR                  (       d  X!l        [        R                  " UR                  S!S<9  U=R                  S=-  sl        U$ )>Nz-b--batch-sizesz1 2)typedefaultz-s--sequence-lengthsz8 16 32 64 128 256 512z-w--warmup-runs   z-n
--num-runs  z--hf-pt-eagerF
store_truez,Benchmark in PyTorch without `torch.compile`)r   actionhelpz--hf-pt-compilez)Benchmark in PyTorch with `torch.compile`--hf-ort-dir-path zDPath to folder containing ONNX models for Optimum + ORT benchmarking)r   r   r   z--ort-msft-model-pathzAPath to ONNX model from https://github.com/microsoft/Llama-2-Onnxz --ort-convert-to-onnx-model-pathz'Path to ONNX model from convert_to_onnx--cache-dirz./model_cachez-Cache dir where Hugging Face files are stored--model-nameTzModel name in Hugging Face)r   requiredr   --precision)int4int8fp16fp32zPrecision to run model)r   r   choicesr   --device)cpucudarocmzDevice to benchmark modelsz--device-idr   zGPU device IDz	--verbosezPrint detailed logsz	--timeout
   z8Number of mins to attempt the benchmark before moving on--log-folderz'Path to folder to save logs and results
model_size/.-z./_)exist_ok<   )argparseArgumentParseradd_argumentstrint
parse_argssetattr
model_namesplitreplacer"   	precision
log_folderosmakedirstimeout)parserargslog_folder_names      m/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/llama/benchmark_all.pyget_argsr=      s   $$&F
	   (	   	   	   ;	   8	   S	   P	   *6	   <	   )	   0%   ')   	   "	   G	   6	   DD, 5 5c :2 > F FsC PQ4??+1T^^,<=O??)KK$/ 	LLBLK    c           
         / nSu  pEnSu  pxpSnSnSnSnSnSnS	n[        U5       nU GH  nUR                  S
S5      nUU;   a  [        U[        U5      S  5      nM5  UU;   a  [        U[        U5      S  5      nMT  UU;   a  SnM^  UU;   a  SnMh  UU;   a-  [	        U[        U5      UR                  S5       5      nUS-  nM  UU;   a(  [	        U[        U5      UR                  S5       5      n	M  UU;   d  M  SU;   a3  [	        UUR                  S5      S-   UR                  S5       5      S-  n
OZUUR                  U5      [        U5      -   S  R                  SS5      n[        R                  " U5      U    S   n[	        U5      S-  n
UUUUUUU	U
/-   nUR                  U5        GM     S S S 5        U$ ! , (       d  f       U$ = f)N)NNN)NNNNzBatch Size: zSequence Length: zto get past_key_valueszwith past_key_valuesz	Latency: zThroughput: zpeak=
r   promptz	per-token r   CPU=   z MB'"max_used_MB)
openr3   r.   lenfloatrfindfindjsonloadsappend)	device_idlog_filebase_resultsentries
batch_sizesequence_lengthstep	latency_s
latency_ms
throughputmemorybatch_patternsequence_patternprompt_step_patternper_token_step_patternlatency_patternthroughput_patternmemory_patternf
input_linelinepeakusageentrys                           r<   process_log_fileri      s   G(8%J0F-I:"M*23!O'N	h1J%%dB/D$ c-&8&:!;<
!T)"%d3/?+@+B&C"D$,'4/" D(!$s?';djjo"NO	&-
#t+"4,>(?$**S/#RS
4'D= #4

3!(;djj>O#PQTXXF  		. 9C<O O QRZZ[^`cdD JJt,Y7FE"5\D0F %#(  u%M  
R NS 
R Ns   CF9:B5F99
Gc           	         SS K nUR                  U / SQS9nUS   R                  S5      US'   US   R                  S5      US'   US   R                  S5      US'   US   R                  S5      US'   US	   R                  S
5      US	'   US   R                  S
5      US'   US   R                  S
5      US'   US   R                  S
5      US'   SS KnUR                  n[        U Vs/ s H/  ofR                  S;   d  M  UR                   SUR                   3PM1     sn5      nSnSn	U(       a.  US   R                  S5      S   nUS   R                  S5      S   n	/ n
UR                  5        GH|  u  pUS   S;   a  [        US   US   SUS   X5      nO[US   S;   a6  [        US   US   SUS   [        R                  [        R                  5      nO[        US   US   US   US   SS5      nUS   UR                  l        US   UR                  l        US   UR                  l        US   UR                  l        US   UR                  R&                  S'   US   UR                  R&                  S'   US	   UR(                  R&                  S'   US   UR(                  l        US   UR(                  R&                  S'   US   UR(                  l        U
R/                  U5        GM     [        R0                  " X5        [        R2                  " UR5                  SS 5      U
5        [6        R9                  S!U S"35        g s  snf )#Nr   )Warmup RunsMeasured Runs
Model NameEngine	PrecisionDevice
Batch SizeSequence LengthStepLatency (s)Latency (ms)Throughput (tps)Memory (GB))columnsrk   r.   rl   rq   rr   rt   rK   ru   rv   rw   )onnxruntimezonnxruntime-gpuz==r   rE   rn   )optimum-ortry   rm   ro   ry   rp   )pytorch-eagerpytorch-compilepytorchrs   measure_stepenginelatency_s_meanthroughput_tps.csvz.jsonzResults saved in !)pandas	DataFrameastypepkg_resourcesworking_setsortedkeyversionr2   iterrowsr   torch__name____version__configwarmup_runsmeasured_runsrU   
seq_length
customizedmetricslatency_ms_meanmax_memory_usage_GBrP   save_as_csvsave_as_jsonr3   loggerinfo)resultsfilenamepddfr   installed_packagesiinstalled_packages_listort_pkg_nameort_pkg_versionrecordsr'   rowrecords                 r<   save_resultsr      sa   	
 
 
B( =)007B}_-44U;B,'..u5B|0188?B=)009B}N+227;B~ 23::7CB=)009B} &22$*<l*<QIk@k	 AEE7"QYYK	 *<l LO.q177=a@1!4::4@C G++-x=::$L!3{#3]CMS_F ]BB$L!3{#3YHu~~_d_p_pF %S%6K8H#h-Y\]eYfhjlnoF$'$6!&)/&:##&|#4 #&'8#9 36v;  0-0]  *69-6H!!"23),^)<&69:L6M!!"23-0-?*v-  0 2  !1!1&'!BGL
KK#H:Q/0K 	ms   (M?Mc                 n   U S[         R                   R                  5       S S3n[        R                  R	                  U R
                  U5      n[        US5       n[        R                  " XUS9n UR                  U R                  5        S S S 5        [        R                  S5        U R                  U R                   U R"                  X R$                  U R&                  /n[)        U R*                  XG5      nU$ ! [        R                   a    UR                  5          Nf = f! , (       d  f       N= f)Nr'   %Y-%m-%d_%H:%M:%Sz.logw)stdoutstderrz Gathering data from log files...)datetimenowr6   pathjoinr5   rI   
subprocessPopenwaitr8   TimeoutExpiredkillr   r   r   num_runsr1   r4   deviceri   rQ   )	r:   benchmark_cmdr   log_filenamelog_pathrR   processrS   r   s	            r<   	benchmarkr   $  s    XQx004467HINLww||DOO\:H	h	""=(S	LL& 
 KK23$$dmmT__fnn^b^i^ijLt~~xFGN (( 	LLN		 
	s*   D&3C<<$D# D&"D##D&&
D4c                  
   [        5       n [        U R                  5        [        R	                  U R
                  5        S[        R                  R                  l	        / n[        U R                  5      [        R                  S'   U R                  (       a  SSSSSSU R                  S	U R                   S
U R"                  SU R$                  SU R&                  S[        U R(                  5      S[        U R*                  5      SU R,                  SU R.                  S/n[        R	                  S5        [        XS5      nUR1                  U5        U R2                  (       a  SSSSSSU R                  S	U R                   S
U R"                  SU R$                  SU R&                  S[        U R(                  5      S[        U R*                  5      SU R,                  SU R.                  S/n[        R	                  S5        [        XS5      nUR1                  U5        U R4                  (       a  SSSSSSU R4                  SU R                  S	U R                   S
U R"                  SU R$                  SU R&                  S[        U R(                  5      S[        U R*                  5      SU R,                  SU R.                  S/n[        R	                  S5        [        XS5      nUR1                  U5        U R6                  (       a  SSSSSSU R6                  SU R                  S	U R                   S
U R"                  SU R$                  SU R&                  S[        U R(                  5      S[        U R*                  5      SU R,                  SU R.                  /n[        R	                  S5        [        XS5      nUR1                  U5        U R8                  (       a  SSSSSSU R8                  SU R                  S	U R                   S
U R"                  SU R$                  SU R&                  S[        U R(                  5      S[        U R*                  5      SU R,                  SU R.                  /n[        R	                  S5        [        XS 5      nUR1                  U5        U R:                   S!U R                    S![<        R<                  R?                  5       S" S#3n[A        U[        RB                  RE                  U R,                  U5      5        g )$NTCUDA_VISIBLE_DEVICESpythonz-mzmodels.llama.benchmarkz--benchmark-typezhf-pt-eagerr   r   r   r	   r   r
   r   r!   r   z--authz'Benchmark PyTorch without torch.compiler{   zhf-pt-compilez$Benchmark PyTorch with torch.compiler|   zhf-ortr   z Benchmark Optimum + ONNX Runtimerz   zort-msftz--ort-model-pathz)Benchmark Microsoft model in ONNX Runtimezort-convert-to-onnxz/Benchmark convert_to_onnx model in ONNX Runtimery   r'   r   r   )#r=   r   verboser   r   __dict__r   backendscudnnr   r-   rQ   r6   environhf_pt_eagerr1   r4   batch_sizessequence_lengthsr   r   r   r5   	cache_dirextendhf_pt_compilehf_ort_dir_pathort_msft_model_pathort_convert_to_onnx_model_pathr"   r   r   r   r   r   )r:   all_resultsr   r   csv_files        r<   mainr   6  sY   :D
KK%)ENN"K),T^^)<BJJ%& $OONN !!KK  !OONN1
4 	=>DA7# $OONN !!KK  !OONN1
4 	:;D1BC7# $  OONN !!KK  !OONN5
8 	67D?7# $$$OONN !!KK  !OONN3
6 	?@D<7# **$!//OONN !!KK  !OONN3
6 	EFD?7#//"!DNN#31X5F5F5J5J5LM^4__cdHbggll4??HEFr>   __main__)r*   r   rN   loggingr6   r   r   benchmark_helperr   r   r   	getLoggerr   r   r=   ri   r   r   r    r>   r<   <module>r      sg        	   ) #			8	$GT6rJ1Z$nGb zF r>   