
    h<                     (   S SK r S SKrS SKrS SKrS SKJr  S SKrS SKrS SKJrJ	r	J
r
JrJr  S SKJrJrJrJr  S SKJr  S SKJr  S SKJr  S SKJr  \R4                  " S	5      rSS
 jrS r\S:X  a"  \" 5       r\" \R@                  5        \" \5        gg)    N)datetime)	Precisioncreate_onnxruntime_sessionget_ort_environment_variablesprepare_environmentsetup_logger)DEFAULT_TOLERANCEMODEL_CLASSESPRETRAINED_GPT2_MODELS
Gpt2Helper)version)QuantizeHelper)
AutoConfig)__version__ c                    [         R                  " 5       nUR                  SSS[        SSR	                  [
        5      -   S9  UR                  SS[        S	[        [        R                  " 5       5      S
SR	                  [        R                  " 5       5      -   S9  UR                  SS[        [        R                  R	                  SS5      SS9  UR                  SS[        [        R                  R	                  SS5      SS9  UR                  SSS[        SS9  UR                  SSSSSS9  UR                  SSSSSS9  UR                  SS 9  UR                  S![        S"S/ S#QS$S%9  UR                  S&SSS'S9  UR                  SS(9  UR                  S)S*[        [        R                  [        [        5      S+S,9  UR                  S-SSS.S9  UR                  SS/9  UR                  S0S1S2[        S3/S4S59  UR                  S6S2[        S3/S7S59  UR                  S8S9S2[        / S:QS;S59  UR                  S<S=SS S>S?9  UR                  S@S[        SASBS9  UR                  SCSSSD9  UR                  SSE9  UR                  SFSSSD9  UR                  SSG9  UR                  SHSSSD9  UR                  SSI9  UR                  SJSSSD9  UR                  SSK9  UR                  U 5      nU$ )LNz-mz--model_name_or_pathTz;Model path, or pretrained model name selected in the list: z, )requiredtypehelpz--model_classFGPT2LMHeadModelz!Model type selected in the list: )r   r   defaultchoicesr   z--cache_dir.cache_modelsz%Directory to cache pre-trained models)r   r   r   r   z
--onnx_dironnx_modelszDirectory to store onnx modelsz--test_timesd   z8Number of repeat times to get average inference latency.)r   r   r   r   z-vz--validate_onnx
store_truezValidate ONNX model)r   actionr   z-oz--optimize_onnxz'Use optimizer.py to optimize onnx model)optimize_onnxz--stager   )r         a6  Stage in generation: 1 (initial decoder), 2 (decoder), 0 (both). 1 - decode the first token when past_sequence_length is zero; 2 - decode the remaining tokens when past_sequence_length is not zero; 0 - one onnx model for both stages 1 and 2. Note that we will optimize 1 and 2 differently for best performance.)r   r   r   r   r   z	--use_gpuzuse GPU for inference)use_gpuz-pz--precisionzfPrecision of model to run. fp32 for full precision, fp16 for half precision, and int8 for quantization)r   r   r   r   z--torchscriptzuse Torchscript)torchscriptz-bz--batch_sizes+r    z
batch size)nargsr   r   r   z--sequence_lengthsz!sequence lengths (excluding past)z-sz--past_sequence_lengths)          @         zpast sequence lengthsz-rz--result_csvz$CSV file for saving summary results.)r   r   r   z--thread_numzThreads to usez--include_copy_output_latency)r   r   )include_copy_output_latencyz	--verbose)verbosez--output_torch_latency)output_torch_latencyz--disable_io_binding)disable_io_binding)argparseArgumentParseradd_argumentstrjoinr   listr
   keysospathintset_defaultsr   FLOAT32
parse_args)argvparserargss      m/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/gpt2/benchmark_gpt2.pyparse_argumentsrB   !   sy   $$&F
JTYYWmMnn   !]'')*0499]=O=O=Q3RR   S.14   S-0-   G   "   6   e,
O   eLOfg
&
!!Yu   %Sde
E*
oSsQCVbc
0   !)$   3   S"Scd
7%P\]
E:
eLI
&
05V
U3
.|T
51T"DK    c                 l   [         R                  " [        5      [         R                  " S5      :  a  [        S5      e[        R                  SU  35        U R                  [        R                  :X  a)  U R                  (       a  U R                  (       d   S5       eU R                  [        R                  :X  a  U R                  (       a   S5       eU R                  S:X  a  U R                  S/:X  d   S5       e[        R                  " U R                   S::  a  ["        R$                  " S	S
9OU R                   5        ['        [        R(                  R+                  5       5        U R,                  nU R.                  n[1        XU R                  5        [2        U R4                     S   n[6        n[8        R:                  " U R<                  U R>                  US9nUR;                  U R<                  XQS9n[        R@                  " U R                  (       a  SOS5      nURC                  U5        URD                  S:  nURG                  UU R<                  U R4                  S	US9n	U	S   n
[2        U R4                     S   nURI                  UUU
U RJ                  UUUS9  U R                  (       d  U R                  [        RL                  :w  Ga  XR                  [        R                  :w  a  [O        U R                  5      OS   n
UR                  U	S   U
U R                  [        R                  :H  URP                  RR                  URP                  RT                  US	U R                  S9  U R                  [        R                  :X  a_  [        R                  S5        [V        RX                  " XS   U5        [V        RZ                  " U5      n[        R                  S5        U	S   n
U R>                  (       a  UR?                  UUUUUS9n[]        U
U R                  SU R                   U RJ                  S9nUc  g UR_                  [a        U Rb                  5      [a        U R                  5      [a        U Rd                  5      UU R4                  5      nURg                  XU R                  [        R                  :H  5      nU Rh                  =(       d3    SRk                  [l        Rn                  " 5       Rq                  S5      5      n[s        USSS9 n/ S Qn[t        Rv                  " UUS!9nURy                  5         U Rb                   GH  nU Rd                   GHs  nU R                   GH^  nUS:  a  US:  a  US:  d   e[        R{                  S"UUU5        UR}                  UUUURR                  URT                  URD                  UR~                  UU R                  [        R                  :H  UUS#9nUR_                  UUUUU R4                  5      n U R                  (       d  U R                  (       a  UR                  UUU R                  5      u  nn[        U5       Hv  u  nn[        U[        5      (       a6  [        R{                  S$U S%[        U5       S&US   R                   35        MQ  [        R{                  S$U S'UR                   35        Mx     OS nS nU R                  (       a!  UR                  UUU R                  5      u  nnO,UR                  UUUUU R                  SU R                  S(9u  nnU R                  (       a  UnU R                  (       d8  / nU H0  nUR                  UR                  5       R                  5       5        M2     UR                  UUU R4                  [        U R                     [        U R                     S)9(       a*  [        R                  S*[        U R                      S+35        [        R                  S,UUUUU R                  (       a  S-OSU(       a  S.OS5        U R<                  U R4                  U R                  [        5       U R                  U R                  U R                  U R>                  UUUU R                  U(       a  US/ OS0US/ S .n UR                  U 5        GMa     GMv     GM     S S S 5        [        R                  S3U 35        U$ ! [         a"    [        R                  S1S	S29         S S S 5        g f = f! , (       d  f       NW= f)4Nz3.1.0z/This tool requires transformers 3.1.0 or later.z
Arguments:z'fp16 requires --optimize_onnx --use_gpuzquantization only supports CPUr    r   z<past_sequence_lengths shall be 0 for stage==1 (init decoder)T)logical)r#   	cache_dir)configrF   zcuda:0cpu   )has_past
new_folderrawr!   )has_position_idshas_attention_maskfp32)auto_mixed_precisionstagezquantizing model...int8zfinished quantizing modelF)enable_all_optimizationnum_threadsr.   zbenchmark_result_{}.csvz%Y%m%d-%H%M%Sar   )modenewline)
model_namemodel_classrQ   environment_variablesgpu	precision	optimizerr#   
batch_sizesequence_lengthpast_sequence_lengthr0   torch_latencyonnxruntime_latency)
fieldnameszMRunning test for batch_size=%d sequence_length=%d past_sequence_length=%d ...)float16rM   rN   ztorch output z is tuple of size z, shape z shape )return_numpyr-   )rY   rtolatolz:Pytorch and ONNX Runtime outputs are all close (tolerance=z).zZbatch_size=%d, sequence_length=%d, past_sequence_length=%d, onnxruntime_latency=%.2f %s %sz(disable_io_binding)z, torch_latency={torch_latency}z.2fNone	Exception)exc_infozResults are saved to file )Vr   parsetransformers_versionRuntimeErrorloggerinfor\   r   FLOAT16r   r"   INT8rQ   past_sequence_lengthstorchset_num_threads
thread_numpsutil	cpu_countprint
__config__parallel_inforF   onnx_dirr   r
   rY   r   r   from_pretrainedmodel_name_or_pathr#   deviceton_layerget_onnx_pathsexport_onnxr.   r<   r4   rG   num_attention_headshidden_sizer   quantize_onnx_modelquantize_torch_modelr   get_output_shapesmaxbatch_sizessequence_lengthsget_output_buffers
result_csvformatr   nowstrftimeopencsv
DictWriterwriteheaderdebugget_dummy_inputs
vocab_sizevalidate_onnxr/   pytorch_inference
test_times	enumerate
isinstancetuplelenshaper0   onnxruntime_inference$onnxruntime_inference_with_binded_ior-   appendrH   numpycompare_outputsr	   r   writerowri   error)!r@   rF   
output_dirrY   
gpt2helperrG   modelr~   use_external_data_formatonnx_model_pathsonnx_model_pathuse_paddingsessionmax_output_shapesoutput_bufferscsv_filenamecsv_filecolumn_names
csv_writerr^   r_   r`   dummy_inputsoutput_shapesoutputsra   ivalueort_outputsort_latencycopy_outputsoutputrows!                                    rA   mainr      s$   }})*W]].  LMM
KK*TF#$~~***!!dll]4]]2~~'<<A!AAzzQ))aS0p2pp0	DOOq<P&**48VZVeVef	%


(
(
*+IJ	t||< 0 01!4KJ''(?(?TM]M]irsF''(?(?'dE \\dll(>F	HHV%~~2!00+ 1  'u-O 0 01!4K $&   T^^y/@/@@*..T]TbTbBb3t~~+>hno  U#NNi///LL,,LL$$$!%** 	! 		
 >>Y^^+KK-...QW@XZrs"77>EKK34.v6O&&(* ' 
 ) %OOG  #44DD&&'D!!"  223Ddnn`i`q`qNqrN??p&?&F&Fx||~G^G^_nGo&pL	lb	1X
  ^^HF
 **J#'#8#8,0,F,F(%>o.AFZ^_F___LLg"',	 $.#>#>",'22**))!%93D3D!D)4+6 $? $L %/$@$@",'((%MO$--1J1J5?5Q5QRWYegkgvgv5w2G] -6g,>5#-eU#;#;$*LL*7s:LSQVZLX`afghaiaoao`p(q%& %+LL=75;;-1X$Y -? '+G,0M227A7W7W 't84K 8B7f7f ' , . - $-2<@<\<\ 8g 84K  --+6L#'#:#:/1.9F$0$7$7

8J8J8L$M /:  *99 ' ,,0,<,<%6t~~%F%6t~~%F  :   !'&`arsw  tB  tB  bC  aD  DF  %G!" x&+0'6:6M6M2SUAN=TV +/*A*A+/+;+;%)ZZ5R5T#'<<)-)-););+/+;+;*4/>4H262I2IGTc/BZ`6A#5F  #++C0U -G $9 +) 
2J KK,\N;< % $[4@#G 
2	1B$C 
2	1s2   Dd%J2c6d%6d"d%!d""d%%
d3__main__)N)!r1   r   loggingr8   r   rv   rs   benchmark_helperr   r   r   r   r   gpt2_helperr	   r
   r   r   	packagingr   quantize_helperr   transformersr   r   rl   	getLoggerrn   rB   r   __name__r@   r.    rC   rA   <module>r      s     
  	     ] \  * # <			2	DNod zDJ rC   