
    h$X                     2   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJrJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  S SKJr  S SKrS SKrS SKrS SKrS SKJr  S SKr\R2                  " \5      r " S	 S
\5      r " S S\5      r " S S5      rS\R>                  0r SSSSSS0 4S jr!S(S jr"S)S jr#S r$S r%S r&S r'S*S jr(\RR                  S 4S jr*S r+S+S jr,S\-\.\/\4      S-  4S jr0 " S  S!\5      r1 " S" S#\15      r2 " S$ S%\15      r3S,S& jr4S' r5g)-    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)Any)versionc                   *    \ rS rSrSrSrSrSrS rSr	g)		Precision    fp32fp16int8int4c                     U R                   $ Nvalueselfs    c/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/benchmark_helper.py__str__Precision.__str__&       zz     N)
__name__
__module____qualname____firstlineno__FLOAT32FLOAT16INT8INT4r   __static_attributes__r   r   r   r   r       s    GGDDr   r   c                   &    \ rS rSrSrSrSrS rSrg)OptimizerInfo*   no_optby_ort	by_scriptc                     U R                   $ r   r   r   s    r   r   OptimizerInfo.__str__1   r   r   r   N)	r   r   r    r!   NOOPTBYORTBYSCRIPTr   r&   r   r   r   r(   r(   *   s     EEHr   r(   c                   &    \ rS rSrS rS rS rSrg)ConfigModifier5   c                     Xl         g r   
num_layers)r   r7   s     r   __init__ConfigModifier.__init__6   s    $r   c                    U R                   c  g [        US5      (       a3  U R                   Ul        [        R	                  SU R                    35        [        US5      (       a3  U R                   Ul        [        R	                  SU R                    35        [        US5      (       a4  U R                   Ul        [        R	                  SU R                    35        g g )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r7   hasattrr;   loggerinfor<   decoder_layers)r   configs     r   modifyConfigModifier.modify9   s    ??"6.//'+F$KKPQUQ`Q`Pabc6+,,$(OOF!KKQRVRaRaQbcd6,--$(OOF!KKQRVRaRaQbcd .r   c                     U R                   $ r   r6   r   s    r   get_layer_numConfigModifier.get_layer_numF   s    r   r6   N)r   r   r    r!   r8   rB   rE   r&   r   r   r   r3   r3   5   s    %er   r3   float32TFc	                 J   [         R                  " 5       n	U(       a   [         R                  R                  U	l        O[         R                  R
                  U	l        U(       a  SU	l        US:  a(  XIl        [        R                  SU	R                   35        U(       a  SU	l
        OSU	l
        U[         R                  " 5       ;   a  U/n
ORU(       aH  US:X  a  SS/n
O@US:X  a  S	S/n
O5US
:X  a  / SQn
O*US:X  d  Uc  SS/n
OUS:X  a  / SQn
O[        SU 35      eS/n
U(       a  U
 Vs/ s H  oU;   a  XU   4OUPM     n
nU(       a  U	R                  SS5        S n [         R                  " X	U
S9nU$ s  snf ! [         a    [        R!                  SU  SU
 35         U$ f = f)NTr   z%Session option: intra_op_num_threads=   dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderrO   rM   cudaCUDAExecutionProvidertensorrt)TensorrtExecutionProviderrS   rM   z)The execution provider is not supported: z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providerszFailed to create session for z with providers=)onnxruntimeSessionOptionsGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_ENABLE_BASICenable_profilingintra_op_num_threadsr>   debuglog_severity_levelget_available_providersRuntimeErroradd_session_config_entryInferenceSession	Exception	exception)onnx_model_pathuse_gpuproviderenable_all_optimizationnum_threadsr^   verbose(enable_mlas_gemm_fastmath_arm64_bfloat16provider_optionssess_optionsrW   namesessions                r   create_onnxruntime_sessionrs   P   s    --/L0;0R0R0a0a-0;0R0R0c0c-(,%Q,7)<\=^=^<_`a*+'*+';6688J		u/1GHI02HII#I
 8#302HII#I !J8*UVV+,	fopfo^b?O7OdT23UYYfo	p/--.XZ]^Gg..Xab N q  g88IIYZcYdefNgs   (E4E9 9%F"!F"c                     U (       a  [         R                  " SSS9  g [         R                  " SS9  [        R                  " S5      R	                  [        R
                  5        g )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(message)s)rw   transformers)coloredlogsinstalllogging	getLoggersetLevelWARNING)rm   s    r   setup_loggerr      sF    J	

 	..)227??Cr   c                 2   U (       a:  [         R                  R                  U 5      (       d  [         R                  " U 5        U(       a:  [         R                  R                  U5      (       d  [         R                  " U5        U(       ab  US:X  a!  S[        R
                  " 5       ;   d   S5       eO;[        [        R
                  " 5       5      R                  / SQ5      (       a   S5       e[        R                  S[        R                   35        [        R                  S[        R                   35        [        R                  S[        R                   35        [        R                  " [        R                  5      [        R                  " S	5      :  d   e[        R                  " [        R                  5      [        R                  " S
5      :  d   e[        R                  " [        R                  5      [        R                  " S	5      :  d   eg )NrK   rL   zBPlease install onnxruntime-directml package to test GPU inference.)rS   rO   rQ   zWPlease install onnxruntime-gpu package, or install ROCm support, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsrX   rb   set
isdisjointr>   r?   torch__version__rx   r
   parse)	cache_dir
output_dirri   rj   s       r   prepare_environmentr      sq   	22
I"''..44
Ju)[-P-P-RR TR
 ;>>@ALL_  ihi  KK"5#4#4"567
KK'(@(@'ABC
KK&{'>'>&?@A ==**+w}}X/FFFF==112gmmH6MMMM==001W]]85LLLLr   c                 r   [        U 5      [        [        U 5      5      -  S-  n[        R                  " U [        R
                  S9S-  nUSU-  -  n[        U 5      US [        R                  " U S5      S-  S [        R                  " U S5      S-  S [        R                  " U S5      S-  S US US S.$ )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarfloat64
percentile)latency_list
batch_size
latency_msr   
throughputs        r   get_latency_resultr      s    \"U3|+<%==FJyyU]]CfLv
23J ,'/4$)$4$4\2$F$OPS#T$)$4$4\2$F$OPS#T$)$4$4\2$F$OPS#T!+C 0S! r   c                    [        USSSS9 n/ SQn[        R                  " X#S9nUR                  5         U  H  nUR	                  U5        M     S S S 5        [
        R                  SU 35        g ! , (       d  f       N'= f)Na asciimodenewlineencoding)enginer
   rW   device	precision	optimizer
io_binding
model_nameinputsthreadsr   sequence_lengthcustom_layer_numr   r   r   r   r   r   r   r   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr>   r?   )resultscsv_filenamecsv_filecolumn_names
csv_writerresults         r   output_detailsr      ss    	lb7	Cx
0 ^^HF
 F' 7 
D< KK8GH= 
D	Cs   AA11
A?c                 n   [        USSSS9 n/ SQn/ nUR                   HT  nUR                  S/:X  a  UR                  SU 35        M*  UR                   H  nUR                  SU SU 35        M     MV     [        R
                  " X4U-   S9nUR                  5         UR                   GHO  n	S	 GHD  n
UR                   GH/  nS
 GH$  nUR                   GH  n0 nU  H  nUS   U	:X  d  M  US   U
:X  d  M  US   U:X  d  M$  US   U:X  d  M/  US   U:X  d  M:  UR                  5        VVs0 s H  u  nnUU;   d  M  UU_M     nnnU(       d7  UR                  U5        UR                  [        R                  US5      5        OU H  nUU   UU   :X  a  M   e   US   nUS   nU(       a  US   USU SU 3'   M  US   USU 3'   M     U(       d  M  UR                  U5        GM     GM'     GM2     GMG     GMR     S S S 5        [        R!                  SU 35        g s  snnf ! , (       d  f       N-= f)Nr   r   r   r   )r   r   r   r   r
   rW   r   r   r   r   r   b_sr   )         )TFr   r   r   r   r   r   r   r   r   z'Summary results are saved to csv file: )r   batch_sizessequence_lengthsappendr   r   r   modelsenginesrl   itemsupdatedictfromkeysr   r>   r?   )r   r   argsr   header_names
data_namesr   r   r   r   input_countengine_namer   r   rowr   kvheadersr   ss                        r   output_summaryr      s1   	lb7	Cx
 
**J$$,!!Aj\"23'+'<'<O%%*R7H&IJ (=	 + ^^H
9RS
 ++J(#'<<K&7
'+'7'7G"$C*1$*<$8J$F(.x(8K(G(.x(8K(G(.|(<
(J(.y(9W(D@F.d1RSWcRctq!tG.d+.(+

7(;(+

4==R3P(Q1=A36q6WQZ3G,G3G 2>(.|(<A(./@(AA'(<BCW<Xas"QCL(97=>R7SasG) +2*  #s * 3 3C 81 (8 '8 $0  ) &1 
Dl KK9,HI! /eM 
D	CsO   CH&-H&8H&H&H&H&,H =H AH&9H&*H& H&&
H4c           
      &   [        USSSS9 nSSSS/[        [        [        U R	                  5       5      5      R                  5       5      Qn[        R                  " X#S	9nUR                  5         U  Hj  n[        [        R                  " 5       5      X   S'   [        R                  X   S'   [        R                  X   S'   XPU   S'   UR                  X   5        Ml     S S S 5        [         R#                  S
U 35        g ! , (       d  f       N'= f)Nr   r   r   r   model_filenamer   rx   r   r   z(Fusion statistics is saved to csv file: )r   listnextitervalueskeysr   r   r   strr   nowrx   r   r   r   r>   r?   )model_fusion_statisticsr   r   r   r   keys         r   output_fusion_statisticsr   *  s    	lb7	Cx	

 $t3::<=>CCEF
 ^^HF
 *C7:8<<>7J#(4;G;S;S#(8494E4E#(1=@C()9: 7 <= + 
D  KK:<.IJ! 
D	Cs   CD
Dc                    ^ ^ 0 n[         R                  " UU 4S jSUS9  [         R                  " UU 4S jSUS9nUR                  U5        UR                  SS05        UR                  [        Xt5      5        U$ )Nc                  (   > TR                  S T 5      $ r   run
ort_inputsort_sessions   r   <lambda>inference_ort.<locals>.<lambda>@  s    +//$
;r   r   numberrepeatc                  (   > TR                  S T 5      $ r   r   r   s   r   r   r   A  s    z)Jr   r   F)timeitr   r   r   )r   r   result_templaterepeat_timesr   warm_up_repeatr   r   s   ``      r   inference_ortr   >  sb    F
MM;An]==!JST]ijL
MM/"
MM<'(
MM$\>?Mr   c           
      d  ^ ^ 0 nT R                  5       mU H  n[        R                  " X   5      R                  U	5      n[        R                  [        X   R                  5      U
5      nTR                  UUR                  R                  SUUR                  UR                  5       5        M     [        U5      S:X  a  [        XgU	5        [        U5       H_  u  nnTR!                  UUU   R                  R                  S["        R$                  UU   R                  UU   R                  5       5        Ma     [&        R(                  " UU 4S jSUS9  [&        R(                  " UU 4S jSUS9nUR+                  U5        UR+                  SS05        UR+                  [-        UU5      5        U$ )Nr   c                  &   > TR                  T 5      $ r   run_with_iobindingr   r   s   r   r   /inference_ort_with_io_binding.<locals>.<lambda>u      ..z:r   r   r   c                  &   > TR                  T 5      $ r   r   r  s   r   r   r  {  r  r   r   T)r   r   
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r   
bind_inputr   typeshapedata_ptrr   allocateOutputBuffers	enumeratebind_outputr   rG   r   r   r   r   )r   r   r   r   ort_output_namesort_outputsoutput_buffersoutput_buffer_max_sizesr   r   	data_typer   r   rq   np_input
input_typeiort_output_namer   r   s   `                  @r   inference_ort_with_io_bindingr  H  s|    F '')J##J$4588@-11#j6F6L6L2MyY
OO  NN	
  >anvN'(89?1$$))MMN  1&&(	
 : MM: ==:L
 MM/"
MM<&'
MM$\:>?Mr   c           	      |    U H6  nU R                  [        R                  " U[        R                  US95        M8     g )N)r   r   )r   r   emptyrG   )r  r  r   r  s       r   r  r    s-     %ekk!5==PQ %r   c                    [         R                  " U 5        [        R                   R                  U 5        [        R                  " U 5        [        R
                  R	                  U 5        [        R
                  R                  U 5        g)z5Set random seed manually to get deterministic resultsN)randomseedr   r   manual_seedrR   manual_seed_all)r  s    r   set_random_seedr!    sR    
KK	LLd	d	JJ4 	JJt$r   returnc            	         SSK Jn JnJnJnJnJnJn   U" 5         / nU" 5       n[        U[        5      (       d  g [        U5       Hi  n	U" U" U	5      5      n
[        U
[        5      (       a    g UR                  U	U" U" U	5      5      U
R                  U
R                  U
R                  S.5        Mk     U" 5         U$ ! U  a  n[!        SU5         S nAg S nAff = f)Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idrq   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr%  r&  r'  r(  r)  r*  r+  
isinstanceintranger   r   r-  r.  r/  print)r%  r&  r'  r(  r)  r*  r+  r   device_countr  r?   errors               r   get_gpu_infor8    s      
)+,,,|$A*+Ea+HID$$$MM-.H.KL!ZZ II II	 % 	 =uEs#   %B; 2B; -AB; ;CCCc                   R    \ rS rSrSS jrS r\S\\\	\
4      S-  4S j5       rSrg)	MemoryMonitori  c                     Xl         g r   keep_measuring)r   r=  s     r   r8   MemoryMonitor.__init__  s    ,r   c                     SS K nSn [        X!R                  [        R                  " 5       5      R                  5       R                  S-  5      n[        S5        U R                  (       d   U$ Mi  )Nr      {Gzt?)	psutilmaxProcessr   getpidmemory_inforssr   r=  )r   rB  	max_usages      r   measure_cpu_usageMemoryMonitor.measure_cpu_usage  sY    	I~~biik'B'N'N'P'T'TW^'^_I%L&& r   r"  Nc                     [        5       er   )NotImplementedErrorr   s    r   measure_gpu_usageMemoryMonitor.measure_gpu_usage  s    !##r   r<  T)r   r   r    r!   r8   rI  r   r   r   r   r	   rM  r&   r   r   r   r:  r:    s9    -	 $4S#X#7$#> $ $r   r:  c                   P   ^  \ rS rSrSU 4S jjrS\\\\4      S-  4S jr	Sr
U =r$ )CudaMemoryMonitori  c                 $   > [         TU ]  U5        g r   )superr8   )r   r=  	__class__s     r   r8   CudaMemoryMonitor.__init__  s    (r   r"  Nc                    SSK JnJnJnJnJnJnJn  / n/ n	 U" 5         U" 5       n
[        U
[        5      (       d  [        R                  SU
 35        g [        U
5       Vs/ s H  nSPM     nn[        U
5       Vs/ s H  o" U" U5      5      PM     n	n [        U
5       H]  nU" U" U5      5      n[        U[        5      (       a  [        R                  SU 35          g [        X   UR                  S-  5      X'   M_     [!        S5        U R"                  (       d  OM  U" 5         [        U
5       Vs/ s H  nUX   X   S.PM     sn$ s  snf s  snf s  snf ! U a   n[        R                  SU5         S nAg S nAff = f)	Nr   r$  z*nvmlDeviceGetCount result is not integer: z%nvmlDeviceGetMemoryInfo returns str: r@  rA  	device_idrq   max_used_MBr0  )r1  r%  r&  r'  r(  r)  r*  r+  r2  r3  r>   r7  r4  r   rC  r/  r   r=  )r   r%  r&  r'  r(  r)  r*  r+  max_gpu_usagegpu_namer6  r  r?   r7  s                 r   rM  #CudaMemoryMonitor.measure_gpu_usage  s   	
 	
 	
 	J-/LlC00I,XY(-l(;<(;1Q(;M<RWXdRefReQ)*DQ*GHReHf|,A23Ma3PQD!$,,'LTF%ST#'*=+;TYY=P'QM$ - e**  N |, -A	 "#$K#0#3
 -  =f  	LLH%P	sO   ;E E !E-E <EAE !AE 5E	E E F!E<<Fr   rO  )r   r   r    r!   r8   r   r   r   r	   rM  r&   __classcell__rT  s   @r   rQ  rQ    s+    )+4S#X#7$#> + +r   rQ  c                   8   ^  \ rS rSrSU 4S jjrS rS rSrU =r$ )RocmMemoryMonitori  c                 N  > [         TU ]  U5        Sn[        R                  R	                  U5      (       a3  U[
        R                  ;  a  [
        R                  R                  U5         SS KnX0l        U R                  R                  5         g ! [         a
    S U l         g f = f)Nz/opt/rocm/libexec/rocm_smir   )
rS  r8   r   r   r   sysr   rocm_smiinitializeRsmiImportError)r   r=  rocm_smi_pathrc  rT  s       r   r8   RocmMemoryMonitor.__init__  sw    (477>>-((CHH,.	!$MMM((* 	! DM	!s   +$B B$#B$c                 h    U R                   c  gU R                   R                  US5      S   S-  S-  $ )NrH   VRAMr   i   )rc  
getMemInfo)r   devs     r   get_used_memory!RocmMemoryMonitor.get_used_memory  s5    == }}''V4Q7$>EEr   c                    U R                   c  g U R                   b#  [        U R                   R                  5       5      OSn[        U5       Vs/ s H  nSPM     nn[        U5       Vs/ s H  nSU 3PM
     nn [        U5       H"  n[	        X2   U R                  U5      5      X2'   M$     [        R                  " S5        U R                  (       d  OM[  [        U5       Vs/ s H  nUXB   X2   S.PM     sn$ s  snf s  snf s  snf )Nr   GPUrA  rW  )	rc  r   listDevicesr4  rC  rl  timer   r=  )r   r6  r  rZ  r[  s        r   rM  #RocmMemoryMonitor.measure_gpu_usage  s   == ;?==;Ts4==4467Z[$),$78$7q$78',\':;':!c!I':;<(#&}'79M9Ma9P#Q  )JJu&&  <(
 )	  ,/
 )
 	
 9;
s   C:)C?#D)rc  rO  )	r   r   r    r!   r8   rl  rM  r&   r]  r^  s   @r   r`  r`    s    !F

 
r   r`  c                    S nUS:X  a  [         nO[        nU" S5      nU (       Ga*  Ub  UnOUR                  5       nUc  g Uc  U$ [        5        nU" 5       nUR	                  UR                  5      n UR	                  U5      n	U	R                  5       n
SUl        UR                  5       nUc
   S S S 5        g [        R                  SU SU 35        [        U5      S:  ae  [        U5      S:  aV  [        U5      [        U5      :X  a>  Sn[        U5       H"  u  pUS   nX   S   nUU-
  n[        UU5      nM$     UsS S S 5        $ S S S 5        g Ub  UnOUR                  5       nUc  U$ [        5        nU" 5       nUR	                  UR                  5      n UR	                  U5      n	U	R                  5       n
SUl        UR                  5       n[        R                  SUS	 S
US	 S35        X-
  sS S S 5        $ ! SUl        UR                  5       nf = f! , (       d  f       g = f! SUl        UR                  5       nf = f! , (       d  f       g = f)NrN   FzGPU memory usage: before=z  peak=r   r   rY  zCPU memory usage: before=z.1fz
 MB, peak=z MB)r`  rQ  rM  r   submitr   r=  r>   r?   r   r  rC  rI  )is_gpufuncmonitor_typestart_memorymemory_monitor_typemonitormemory_before_testexecutor
mem_thread	fn_thread_rH  max_usedr  memory_beforebeforeafterr/  s                     r   measure_memoryr  0  si   v//!%(G#!-!(!:!:!<%<%%!X)+G!)B)BCJ0$OOD1	$$&).&&--/	  "! KK34F3GwykZ[%&!+I!0CL^H_cfgpcqHq(12D(E$A*=9F%L7E 6>D"8T2H	 )F
  - "!!.  )$668|!!		%'__W%>%>?
	, -I  "A%*G""))+I/0B3/GzR[\_Q``cde- 
	7 */&&--/	 "!. $ &+G""))+I 
	sO   #H3!G<H8BH5#I!H):8I<HH
H&)II
Ic                      / SQn SnU  H3  n[         R                  " U5      nUc  M  U(       a  US-  nX SU 3-  nM5     U$ )N)ORT_DISABLE_FUSED_ATTENTION!ORT_ENABLE_FUSED_CAUSAL_ATTENTION!ORT_DISABLE_FUSED_CROSS_ATTENTIONORT_DISABLE_TRT_FLASH_ATTENTION&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONORT_TRANSFORMER_OPTIONSORT_CUDA_GEMM_OPTIONSr   ,=)r   getenv)	env_namesenvrq   r   s       r   get_ort_environment_variablesr  t  sU    I C		$=3JCq    Jr   rO  r   )r   ){   )rR   N)6r   r{   r   r  rb  rq  r   abcr   r   concurrent.futuresr   r   enumr   r   typingr	   ry   r   r   rx   	packagingr
   rX   r|   r   r>   r   r(   r3   rG   r  rs   r   r   r   r   r   r   r   longlongr  r  r!  r   r   r   r8  r:  rQ  r`  r  r  r   r   r   <module>r     s;     	  
   # 1          			8	$ D  , u}}   -2DNDM8 ID7JtK(* nn:zR%#d4S>*T1 #L$C $(/ /d(
 (
VA.Hr   