
    hP                     D   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
J
r
  S SKJr  S SKrS SKrS SKrS SKJrJr  \	 " S S5      5       r\	 " S S	5      5       r   SS
 jrS rS rS rS rS rS rS rS rS rS r S r!S r"\#S:X  a
  Sr$\"" 5         gg)    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                       \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   \\S'   \\S'   \\S	'   \\S
'   \\S'   \\S'   \\S'   \\S'   Srg)TestSetting!   
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_length N)	__name__
__module____qualname____firstlineno__int__annotations__boolstr__static_attributes__r       a/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/bert_perf_test.pyr	   r	   !   sI    OOOMM
IM    r"   r	   c                   r    \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   \S-  \S'   \S-  \S	'   \\S
'   Srg)ModelSetting2   
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_typer   )r   r   r   r   r    r   r   r!   r   r"   r#   r%   r%   2   s7    ON*$:%Nr"   r%   c                    SS K nUR                  U5        U(       a  SUR                  5       ;  a  [        S5        U(       a<  US:X  a  SS/nO4US:X  a  SS/nO)US	:X  a  / S
QnOUS:X  a  SS/nOUS:X  a  / SQnOSS/nOS/nUR	                  5       n	XYl        UR                  R                  U	l        Uc  UR                  R                  U	l        OUS:X  a  UR                  R                  U	l        OUS:X  a  UR                  R                  U	l        OlUS:X  a  UR                  R                  U	l        OJUS:X  a  UR                  R                  U	l        O(US:X  a  UR                  R                  U	l        OXIl        Ub  X9l        UR#                  X	US9n
U(       a  US:X  a  SU
R%                  5       ;   d   eOUS:X  a  SU
R%                  5       ;   d   eOUS	:X  a-  SU
R%                  5       ;   d   eSU
R%                  5       ;   d   eO}US:X  a  SU
R%                  5       ;   d   eO`US:X  a-  SU
R%                  5       ;   d   eSU
R%                  5       ;   d   eO-SU
R%                  5       ;   d   eOSU
R%                  5       ;   d   eUb;  ['        U5       nU
R)                  [*        R,                  " U5      5        S S S 5        U
$ U
$ ! , (       d  f       U
$ = f)Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr5   r3   cudatensorrt)TensorrtExecutionProviderr0   r3            c   )	providersr7   r:   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDORT_ENABLE_LAYOUTr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r'   r   r   r   rK   r   tuning_results_pathr@   execution_providerssess_optionssessionfs               r#   create_sessionr[   >   s    ++L9+;3V3V3XX N	
 u#9;Q"R#:<R"S##
 #:<R"S## $;<R"S56--/L&2#"-";";"J"JL'0;0R0R0a0a-	!Q	&0;0R0R0b0b-	!Q	&0;0R0R0c0c-	!Q	&0;0R0R0f0f-	!Q	&0;0R0R0d0d-	!R	'0;0R0R0a0a-0H-',@)**:Ob*cGu)W-B-B-DDDD*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE%)>)>)@@@@&%&!&&tyy|4 ' N7N '& Ns   &K
Kc                     [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R
                  [        R                  0nX   $ N)torchfloat32npfloat16int32int64longlong)
torch_typetype_maps     r#   
numpy_typerg      sH    rzzrzzRXXR[[	H r"   c                 @   U R                  5        VVs0 s H+  u  p4U[        R                  " U5      R                  U5      _M-     nnnUR                  5        VVs0 s H+  u  p4U[        R                  " U5      R                  U5      _M-     nnnXV4$ s  snnf s  snnf r]   )itemsr^   
from_numpyto)inputsoutputsdevicenamearrayinput_tensorsoutput_tensorss          r#   create_input_output_tensorsrs      s    QWQ]Q]Q_`Q_+$T5++E255f==Q_M`RYR_R_RabRa;4dE,,U366v>>RaNb(( abs   2B2Bc           
         U R                  5       nUR                  5        HZ  u  pEUR                  UUR                  R                  S[        UR                  5      UR                  UR                  5       5        M\     UR                  5        HZ  u  pEUR                  UUR                  R                  S[        UR                  5      UR                  UR                  5       5        M\     U$ Nr   )

io_bindingri   
bind_inputrn   typerg   dtypeshapedata_ptrbind_output)sessrq   rr   rv   ro   tensors         r#   create_io_bindingr      s    "J%++-MMv||$LLOO	
 . ',,.MMv||$LLOO	
 / r"   c                    / n/ nUR                   (       a  SOSn[        U5       H  u  pxU R                  X(5      n	UR                  U	5        0 n
[	        [        U5      5       H  nX   XU   '   M     [        XU5      u  p[        XU5      nU R                  U5        [        R                  " 5       nU R                  U5        [        R                  " 5       U-
  nUR                  U5        M     XE4$ )Nr8   cpu)r   	enumeraterunappendrangelenrs   r   run_with_iobindingtimeitdefault_timer)rY   
all_inputsoutput_namestest_settingresultslatency_listrn   _test_case_idrl   resultrm   irq   rr   rv   
start_timelatencys                    r#   %onnxruntime_inference_with_io_bindingr      s    GL#++VF!*:!6\2vs<()A'-yGO$ * )DFU[(\%&w~N
 	"":.))+
"":.&&(:5G$! "7$   r"   c                 b   [        U5      S:  a&  U R                  U[        R                  " U5      5        / n/ n[	        U5       He  u  pV[
        R                  " 5       nU R                  X&5      n[
        R                  " 5       U-
  n	UR                  U5        UR                  U	5        Mg     X44$ ru   )r   r   randomchoicer   r   r   r   )
rY   r   r   r   r   r   rl   r   r   r   s
             r#   onnxruntime_inferencer      s    
:L&--
";<GL!*:!6))+
\2&&(:5vG$ "7   r"   c                    UR                  5       nS[        R                  R                  U 5       S3nUSUR                   SUR
                   S3R                  SS5      -  nUSUR                   SUR                   S3-  nUS	UR                   S
UR                   S3-  nUSUR                   SUR                   S3-  nUSUR                   S3-  nUSUR                   3-  nU$ )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerK   r   replacer   r   r   r   r   r   r   r   )r'   rY   r   rX   options        r#   	to_stringr      s9   ..0Lbgg&&z2315F
),*O*O)PPfgs  hI  hI  gJ  JK  L  T  T%r F L3344ElFbFbEccdeeF
L334LAXAX@YYZ[[F
--..>|?Z?Z>[[\]]F
()M)M(NaPPF
'(K(K'LMMFMr"   c           
      X   [        U R                  UR                  UR                  UU R                  UR
                  U R                  S9nUR                  5        Vs/ s H  ofR                  PM     nn[        U R                  XQ5      nX;   a  [        SU5        g [        SU5        / n	UR                  (       a<  [        UR                  5       H"  n
[        XSXq5      u  pU	R                  U5        M$     O;[        UR                  5       H"  n
[!        XSU5      u  pU	R                  U5        M$     ["        R$                  " U	5      S-  n[&        R(                  " U5      n["        R*                  " US5      n["        R*                  " US5      n["        R*                  " US5      n["        R*                  " US5      n["        R*                  " US	5      nUR,                  S
U-  -  nUUUUUUU4X('   [        SR/                  [/        US5      [/        US5      5      5        U R0                  (       a  [2        R4                  R7                  U R0                  5      n[2        R4                  R9                  U5      (       aM  UnUR;                  SS5      S    S[<        R>                  " 5       RA                  5        S3n[        SUSUS5        URC                  5       n[E        US5       n[F        RH                  " UU5        S S S 5        [        SU5        g g s  snf ! , (       d  f       N!= f)N)r   rV   zskip duplicated test:zRunning test:  r&   K   Z   _   r>   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr;   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)%r[   r'   r   r   r+   r   r,   get_outputsro   r   rC   r   r   r   r   extendr   r`   rp   
statisticsmean
percentiler   formatr-   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrR   rT   dump)model_settingr   perf_resultsr   r   rY   outputr   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrZ   s                            r#   run_one_testr      s     !..)>>G /6.A.A.CD.CFKK.CLD
M,,g
DC
%s+	/3""//0B$I\%!G ##L1	 1 //0B$9'|$\!G##L1 1
 *+d2J ooj1Oz2.Jz2.Jz2.Jz2.Jz2.J((F_,DEJ 	L 
6==f_V[>\^deoqv^wx **ggoom&I&IJ77>>+&&)O*11'1=a@A8<<>C[C[C]B^^cfK*o/FU_`((*+s#qIIc1 $*K8 +] El $#s   L(L
L)c                     [         R                  " [        U UUUU4S9nUR                  5         UR	                  5         g )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processs         r#   launch_testr   =  s=    %% 
	G MMOLLNr"   c                 n   UR                   b  [        U UUUUR                   5        g [        R                  " SS9n[        R                  " SS9n[	        XT15      n[        S[        SU5      5       H  nXv;  d  M
  UR                  U5        M     UR                  SS9  U H  n[        XX#U5        M     g )NF)logicalTr;      )reverse)	r   r   psutil	cpu_countlistr   minr   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   s	            r#   run_perf_testsr   L  s    ((4--	
 	  /I$$T2Mm781c"m,-%$$Q' . 4( 1MK_` !2r"   c                    [        U R                  U R                  U R                  U R                  5      u  p4n[        SUR                   SUR                   SUR                   35        [        UR                  UR                  UR                  UR                  UR                  UUUUR                  UR                  U R                  S9n[        XX&5        g )NzGenerating z samples for batch_size=z sequence_length=)r.   )r   r'   r(   r)   r*   rC   r   r   r   r   r   r   r   r   r.   r   )r   r   r   	input_idssegment_ids
input_maskr   s          r#   run_performancer   d  s    )8  $$&&%%	*&IJ 

l--..F|G^G^F__pq}  rN  rN  qO  	P $$$,,++))J =Ir"   c            	      t   [         R                  " 5       n U R                  SS[        SS9  U R                  SSS[        SSS	9  U R                  S
SS[        SS9  U R                  SS[        SSS9  U R                  SSS[        SSS9  U R                  SS[        / SQSSS9  U R                  SS[        SSS9  U R                  SSSS S!9  U R                  SS"9  U R                  S#S[        S$/ S%QS&S'9  U R                  S(SSS)S!9  U R                  SS*9  U R                  S+SSS,S!9  U R                  SS-9  U R                  S.S[        S S/S9  U R                  S0S1S[        S S2S9  U R                  S3S[        S S4S9  U R                  S5S[        S S6S9  U R                  S7S[        S S8S9  U R                  S9S [        S:S;9  U R                  S<S [        S=S;9  U R                  S>S?S@[        SAS;9  U R                  SBSCSSSDS!9  U R                  SSE9  U R                  SFS[        S$SGS9  U R                  5       nU$ )HNz--modelTzbert onnx model path)requiredrx   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rx   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rx   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r;   r<   r=   r>   r>   zfonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 3 - layout, 99 - enable all.)r   rx   choicesr   r   z--seedr=   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr<   )r   r;   r<   r=      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rx   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rx   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr    r   set_defaults
parse_args)parserr   s     r#   parse_argumentsr     s=   $$&F
	DsAWX
Z   /   0   Y    u   _   (	   &
=   eLyY
&
*U<Vfg
u-
(    ,   '   )   ,    B	   !@	   #8   "B   u5
|   DKr"   c                     [        5       n U R                  S:X  a'  [        S[        SU R                  -  5      5      U l        U R
                  S::  a  U R                  U l        [        R                  " 5       nUR                  5       n[        U R                  5      n[        U5      S:  a  [        U5      S::  d  [        S5      e[        U R                  U R                   U R"                  U R$                  U R&                  U R(                  U R*                  U R,                  5      nU H  n[/        UU R                  U R                  U R                  U R0                  U R2                  U R4                  U R6                  U R8                  U R:                  U R<                  U R
                  U R>                  5      n[A        SU5        [C        XFU5        M     [E        URG                  5       SS S	9n[H        RJ                  RM                  [O        U R                  5      RP                  S
RS                  U R0                  (       a  SOSSRM                  [E        U5       Vs/ s H  n[U        U5      PM     sn5      U R                  [V        RX                  " 5       R[                  S5      5      5      n	[]        U	SSS9 n
[^        R`                  " U
SSS9nS nU H  u  pURc                  S5      nUcH  / SQnURe                  U Vs/ s H  oRc                  S5      S   PM     sn5        URg                  U5        U Vs/ s H  n[S        US5      PM     nnURe                  U Vs/ s H  oRc                  S5      S   PM     sn5        URg                  U5        M     S S S 5        [A        SU	5        g s  snf s  snf s  snf s  snf ! , (       d  f       N/= f)Nr   r;   r      z batch_size not in range [1, 128]ztest settingFc                     U S   $ )Nr;   r   )xs    r#   <lambda>main.<locals>.<lambda>S  s	    qQRtr"   )r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)=r   zTest summary is saved to)4r   r   maxr   samplesr   r   r   Managerdictsetr   r   	Exceptionr%   modelr(   r)   r*   r+   r,   r-   r.   r	   r   r   r   r   r   r   r   r   rC   r   sortedri   r   r   r   r   parentr   r    r   r   strftimerR   csvwritersplitr   writerow)r   managerr   batch_size_setr   r   r   sorted_resultsr   summary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluess                    r#   mainr#  #  s   D!aTDLL%8!9:##q('+';';$%%'G<<>L)N1$^)<)C:;; 

!!""	M %
"  LLOOLLMM%%IILL((''
  	nl+\B% %* L..0%^TN77<<TZZ(//\\EuHHf^&<=&<c!f&<=>  LLN##O4		
L 
lD"	-ZZDN
 .CYYs^F @AQ@A##G,0;<1fQ&F<MMF;Fq773<?F;<'# !/ 
., 

$l37 >(  A =;' 
.	-s=    N2=AON7!O=N<O$OO7O
O__main__)Nr<   N)%r   r  rT   r   r   r   r   r   dataclassesr   r   pathlibr   numpyr`   r   r^   bert_test_datar   r   r	   r%   r[   rg   rs   r   r   r   r   r   r   r   r   r   r#  r   __spec__r   r"   r#   <module>r*     s     
   	    !      > ! ! !      "Xv ).!2! B9Ja0J:_DQ4h zHF	 r"   