
    hG                        S r SSKrSSKrSSKrSSKrSSKJr  SSKrSSKrSSKJ	r	J
r
JrJrJrJrJrJrJrJrJr  SSKJr  SSKJrJr  SSKJrJrJrJr  SSKJr  SS	K J!r!  \RD                  " S
5      r#\RH                  " SS9r$S\RJ                  ;  a  \&" \$5      \RJ                  S'   SSK'r'SSK(J)r)J*r*J+r+  S r,S r-S\.S\.4S jr/S r0S r1S r2\3S:X  a  \2" 5         gg)a   Benchmarking the inference of pretrained transformer models.
PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py.
One difference is that random input_ids is generated in this benchmark.

For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used.

Example commands:
    Export all models to ONNX, optimize and validate them:
        python benchmark.py -b 0 -o -v -i 1 2 3
    Run OnnxRuntime on GPU for all models:
        python benchmark.py -g
    Run OnnxRuntime on GPU for all models with fp32 optimization:
        python benchmark.py -g -o
    Run OnnxRuntime on GPU with fp16 optimization:
        python benchmark.py -g -o -p "fp16"
    Run TorchScript on GPU for all models:
        python benchmark.py -e torchscript -g
    Run TorchScript on GPU for all models with fp16:
        python benchmark.py -e torchscript -g -p "fp16"
    Run ONNXRuntime and TorchScript on CPU for all models with quantization:
        python benchmark.py -e torchscript onnxruntime -p "int8" -o
    Run OnnxRuntime with the ROCM provider and graph optimization script:
        python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm
    Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support:
        python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm

It is recommended to use run_benchmark.sh to launch benchmark.
    N)datetime)ConfigModifierOptimizerInfo	Precisioncreate_onnxruntime_sessionget_latency_resultinference_ortinference_ort_with_io_bindingoutput_detailsoutput_fusion_statisticsoutput_summarysetup_logger)FusionOptions)MODEL_CLASSESMODELS)create_onnxruntime_inputexport_onnx_model_from_ptexport_onnx_model_from_tfload_pretrained_model)version)QuantizeHelper F)logicalOMP_NUM_THREADS)
AutoConfigAutoTokenizerLxmertConfigc                 	   SS K n/ nU (       ag  SUR                  5       ;  aS  SUR                  5       ;  a?  SUR                  5       ;  a+  SUR                  5       ;  a  [        R                  S5        U$ SnUS:X  a=  [        R
                  nSnS	UR                  5       ;  a  [        R                  S
5        U$ U[        R
                  :X  a  [        R                  SU S35        U GH  n[        U   S   nU
 GH  nU[        U5      :  a    M(  US U n[        U   S   Ul	        [        R                  " U5      nSU;   a\  [        R                  " 5          [        U[        U   S   [        U   S   [        U   S   UUUUUU UUUUUUU5      u  nn n!n"S S S 5        SU;   a>  [        U[        U   S   [        U   S   [        U   S   UUUUUU UUUUUUU5      u  nn n!n"W (       d  M  [!        WU USUUUS9n#U#c  GM  U#R#                  5        V$s/ s H  n$U$R$                  PM     n%n$/ n&U (       a  SOSn'[&        R(                  " UUS9n([*        R,                  " [/        U5      [/        U5      [/        W!U(R0                  5      /5      n)[*        R,                  " [/        U5      U(R0                  /5      n*U GH  n+U+S::  a  M  U GH  n,W"b  U,U":  a  M  SU;   a  [*        R2                  O[*        R4                  n-[7        U!U+U,UU(U-5      n.SUR8                  UU'UUU(       + UUUU+U,UR;                  5       [=        [>        R@                  " 5       5      S.n/U(R                  S;   a4  [        RC                  SU SU+SU(RD                  U(RD                  / 35        O[        RC                  SU SU+U,/ 35        U(       a  [G        U#U.U/U	U+U5      n0OU#RI                  U%U.5      n1U)/n2[K        [        U15      5       H=  n3U3S:X  a#  [        U   S   S:X  a  U2RM                  U*5        M,  U2RM                  U)5        M?     SU;   a  [*        RN                  O[*        RP                  n4[S        U#U.U/U	U%U1U&U2U+U'U4U5      n0[        RC                  U05        URM                  U05        GM     GM     GM     GM     U$ ! , (       d  f       GN!= fs  sn$f )Nr   CUDAExecutionProviderMIGraphXExecutionProviderROCMExecutionProviderDmlExecutionProviderzPlease install onnxruntime-gpu or onnxruntime-directml package instead of onnxruntime, and use a machine with GPU for testing gpu performance.tensorrt   TensorrtExecutionProviderzhPlease install onnxruntime-gpu-tensorrt package, and use a machine with GPU for testing gpu performance.zOptimizerInfo is set to zA, graph optimizations specified in FusionOptions are not applied.   pt      tfT)enable_all_optimizationnum_threadsverbose(enable_mlas_gemm_fastmath_arm64_bfloat16cudacpu	cache_dironnxruntimeenginer   	providersdevice	optimizer	precision
io_binding
model_nameinputsthreads
batch_sizesequence_lengthcustom_layer_numr   vitswinzRun onnxruntime on  with input shape gpt)*r3   get_available_providersloggererrorr   NOOPTwarningr   len
model_typer   parsetorchno_gradr   r   r   get_outputsnamer   from_pretrainednumpyprodmaxhidden_sizeint64int32r   __version__get_layer_numstrr   nowinfo
image_sizer	   runrangeappendlonglongintcr
   )5use_gpuprovidermodel_namesmodel_classconfig_modifierr9   r,   batch_sizessequence_lengthsrepeat_timesinput_countsoptimizer_infovalidate_onnxr2   onnx_dirr-   	overwritedisable_ort_io_bindinguse_raw_attention_maskmodel_fusion_statisticsmodel_source(enable_arm64_bfloat16_fastmath_mlas_gemmargsr3   resultswarm_up_repeatr;   all_input_names
num_inputsinput_namesfusion_optionsonnx_model_fileis_valid_onnx_model
vocab_sizemax_sequence_lengthort_sessionnode_argort_output_namesoutput_buffersr7   configmax_last_state_sizemax_pooler_sizer>   r?   input_value_type
ort_inputsresult_templateresultort_outputsoutput_buffer_max_sizesi	data_types5                                                        \/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/benchmark.pyrun_onnxruntimer   Y   s1   2 G$K,O,O,QQ(0S0S0UU$K,O,O,QQ#;+N+N+PP ]	
 N:&,,&k.Q.Q.SSLLz N,,,&~&66wx	
 "
 ,Q/&JC00)+:6K$Z03DO*006N|#]]_ 2"z*1-z*1-z*1-#'! #!&%.!/&#'+"+ %2 |# .:&q):&q):&q)#"!*+"##''* '4(,'9aK ">I>U>U>WX>W(>WXN&VEF//
iPF"'**$()
F$6$67# $jj#k*:F<N<N)OPO)
?'7O*6?M`;` 6:l6Ju{{PUP[P[$!9""'#("J #0#.#:#:%-"(%3%.*@&@&0",#.&0+:,;,I,I,K$'$7'O" ((O;1*=OQ[]^`f`q`qsy  tE  tE  QF  PG  H &9*EWYcetXuWv$wx-!.'&+(&*" '2oo6F
&S3F2G/!&s;'7!8A Av&*<Q*?5*H 7 > > O 7 > >?R S "9 7;l6JENNPUPZPZ	!>'&+(,'*3&"%*" KK'NN6*S (8 *o ' "N Ny %_D  Ys   ?R1S1
S c                   ^^ / nU (       a:  [         R                  R                  5       (       d  [        R	                  S5        U$ [         R
                  " S5        U GHW  n[        R                  " XUS9nUR                  U5        [        UUUUS9nUR                  S;   a  US   /nO [        R                  " XS9nUR                  n[        R                  SU 35        [        R                  S	UR                  5        35        U[        R                   :X  a  UR#                  5         [         R$                  " U (       a  S
OS5      nUR'                  U5        U[        R(                  :X  a  [*        R,                  " U5      nU GH'  nUS::  a  M  U GH  nUR                  S;   a  [        R/                  SU SUSUR0                  UR0                  / 35        [         R2                  " USUR0                  UR0                  4U[        R                   :X  a  [         R4                  O[         R6                  US9mO]Wb  UU:  a  M  [        R/                  SU SUU/ 35        [         R8                  " SUR:                  S-
  UU4[         R<                  US9m U	(       a   [         R>                  RA                  UT5      OU
(       a  [         RB                  " U5      OUmT" T5        [D        RF                  " UU4S jUSS9nU	(       a  SO
U
(       a  SOS[         RH                  SU (       a  SOSSUSUSUUUURK                  5       [M        [N        RP                  " 5       5      S.nURS                  [U        UU5      5        [        R/                  U5        URW                  U5        GM     GM*     GMZ     U$ ! [X         a?  n[        R[                  U5        [         R                  R]                  5          S nAGMi  S nAff = f)NzYPlease install PyTorch with Cuda, and use a machine with GPU for testing gpu performance.F)torchscriptr2   )r   r2   custom_model_classrA   r   r1   zModel zNumber of parameters zcuda:0r0   zRun PyTorch on rD   r&   )sizedtyper7   r(   )lowhighr   r   r7   c                     > T " T5      $ N )	inference	input_idss   r   <lambda>run_pytorch.<locals>.<lambda>  s
    Yy5I    repeatnumberr   torch2rN   NAr/   r   r4   )/rN   r/   is_availablerG   rH   set_grad_enabledr   rR   modifyr   rL   r   model_max_lengthdebugnum_parametersr   FLOAT16halfr7   toINT8r   quantize_torch_modelr]   r^   randnfloat16float32randintr   longjittracecompiletimeitr   rY   rZ   r[   r   r\   updater   ra   RuntimeError	exceptionempty_cache)rd   rf   rg   rh   r9   r,   ri   rj   rk   r   r   r2   r-   rw   r;   r   model	tokenizermax_input_sizer7   r>   r?   runtimesr   er   r   s                            @@r   run_pytorchr   :  s^    Guzz..00pq	5!!
++J[dev&%*	
 / 0 34%55jVI&77NveW%&,U-A-A-C,DEF	)))JJL'hu=	&"77>E%JQ#3$$7KK)*5GUVX^XiXikqk|k|H}G~ !&(!V->->@Q@QR/8I<M<M/MemmSXS`S`%!I &1o6V KK/*=OQ[]lPmOn op %#..2(/:#jj%!I-=H		y9flemm\aNbrw  i(%}}-IR^ghiH 4?-PVH\c#(#4#4%),3&%'%.&(&0"##.&0+:,;,I,I,K$'$7F  MM"4Xz"JKKK'NN6*a $4	 &= "n N	 $ -$$Q'JJ**,,-s   DN<<
P	3P 	 P	do_eager_modeuse_xlac                 4   ^ ^^^ SSK Jm  SS KmU UUU4S jnU$ )Nr   )wrapsc                    >^  T" T 5      U 4S j5       nT" T 5      TR                  TS9U 4S j5       5       nTSL a  TSL d   S5       eU$ U$ )Nc                     > T" U 0 UD6$ r   r   rv   kwargsfuncs     r   run_in_eager_modeFrun_with_tf_optimizations.<locals>.run_func.<locals>.run_in_eager_mode  s    (((r   )experimental_compilec                     > T" U 0 UD6$ r   r   r   s     r   run_in_graph_modeFrun_with_tf_optimizations.<locals>.run_func.<locals>.run_in_graph_mode  s     (((r   TFzcCannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`.)function)r   r   r   r   r*   r   r   s   `  r   run_func+run_with_tf_optimizations.<locals>.run_func  st    	t	) 
	) 
t	'	2	) 
3 
	) D e# u# %$$$r   )	functoolsr   
tensorflow)r   r   r   r*   r   s   `` @@r   run_with_tf_optimizationsr     s    % %$ Or   c                   ^^^ ^!^" / nSS K m"T"R                  R                  R                  U5        U (       d  T"R                  R	                  / S5        U (       a6  T"R
                  R                  5       (       d  [        R                  S5        U$ U (       a}  T"R                  R                  S5      n T"R                  R	                  US   S5        T"R                  R                  R                  US   S5        T"R                  R                  SS9  U[         R"                  :X  d  U[         R$                  :X  a  ['        S5      eU GH$  n[(        R*                  " XS9mUR-                  T5        [/        UTU	USS	9m![0        R*                  " XS9nUR2                  nU GH  nUS::  a  M  U GH  nUb  UU:  a  M  [        R5                  S
U SUU/ 35        SS KnUR9                  5       n[;        UU-  5       Vs/ s H"  nUR=                  STR>                  S-
  5      PM$     nnT"RA                  UUU4T"RB                  S9m  [E        SSS9U U!4S j5       n[E        SSS9U U!4S j5       n[E        SSS9UU U!U"4S j5       nUmTRF                  (       a  UmO[I        T[J        5      (       a  UmT" 5         [L        RN                  " U4S jUSS9nST"RP                  SU (       a  SOSSUSUSUUUURS                  5       [U        [V        RX                  " 5       5      S.nUR[                  []        UU5      5        [        R5                  U5        UR_                  U5        GM     GM     GM'     U$ ! [         a   n[        R                  U5         S nAGNS nAff = fs  snf ! [         aG  n[        R                  U5        SSK0J1n  URe                  5       nURg                  5          S nAGMC  S nAff = f)Nr   GPUzVPlease install Tensorflow-gpu, and use a machine with GPU for testing gpu performance.Tz/gpu:0)r7   z+Mixed precision is currently not supported.r1   )r   r2   r   is_tf_modelzRun Tensorflow on rD   r(   )shaper   F)r   r   c                     > T" T SS9$ )NF)trainingr   r   r   s   r   encoder_forward'run_tensorflow.<locals>.encoder_forward  s    $Y??r   c                     > T" T T SS9$ )NF)decoder_input_idsr   r   r   s   r   encoder_decoder_forward/run_tensorflow.<locals>.encoder_decoder_forward  s    $Y)V[\\r   c                     > TR                   R                  SSTR                  /5      n TR                   R                  SSTR                  /5      nT" TU USS9$ )Nr(   F)visual_feats
visual_posr   )randomnormalvisual_feat_dimvisual_pos_dim)featsposr   r   r   r*   s     r   lxmert_forward&run_tensorflow.<locals>.lxmert_forward  s^     "		 0 0!Q8N8N1O P ii..1f6K6K/LM$%).'*%*	  r   c                     > T " 5       $ r   r   )r   s   r   r    run_tensorflow.<locals>.<lambda>'  s    Y[r   r   r   r   r/   r0   r   r4   )r/   )4r   r   	threading set_intra_op_parallelism_threadsset_visible_devicestestis_built_with_cudarG   rH   list_physical_devicesexperimentalset_memory_growth
distributeOneDeviceStrategyr   r   r   r   r   NotImplementedErrorr   rR   r   r   r   r   r]   r   Randomr`   r   r   constantrX   r   is_encoder_decoder
isinstancer   r   r   rY   rZ   r[   r   r\   r   r   ra   numbar/   get_current_devicereset)#rd   rf   rg   rh   r9   r,   ri   rj   rk   r2   r-   rw   physical_devicesr   r;   r   r   r>   r?   r   rngr   valuesr   r   r   r   r   r/   r7   r   r   r   r   r*   s#                                 @@@@@r   run_tensorflowr    s    GII88E
		%%b%0rww1133mn99::5A	 II))*:1*=uEII""445Ea5H$OMM++8+< I%%%inn)D!"OPP!
++JLv&%*
 "11*R	"33%JQ#3!-/N2R0<NPZ\kOlNmnommoINz\kOkIlmIlA#++a):):Q)>?IlmKKz?6S[][c[cKd	7#.UER@ S@ /UER] S] /UER S !0I00$;	#FL99$2	K%}}-@^_`H #/#%>>%),3&%'%.&(&0"##.&0+:,;,I,I,K$'$7F  MM"4Xz"JKKK'NN6*{ $4	 &! "r N  	 Q	 F nj $ #$$Q'*!446FLLNN#s8   4A!M5  )N"
	DN'5
N?NN'
O8	1;O3	3O8	c                     [         R                  " 5       n U R                  SSSS[        / SQ[	        [
        R                  " 5       5      SSR                  [
        R                  " 5       5      -   S9  U R                  S	SS
[        SSS/SS9  U R                  SS[        S [	        [        5      SSR                  [        5      -   S9  U R                  SSSS[        S// SQSS9  U R                  SSS[        [        R                  R                  SS5      SS9  U R                  SS[        [        R                  R                  SS5      SS9  U R                  SS SS!S"S#9  U R                  S$S[        S S%S9  U R                  S&S'[        [        R                  [	        [        5      S(S)9  U R                  S*SS!S+S#9  U R                  S,SS!S-S#9  U R                  S.S/[        [        R                  [	        [        5      S0S)9  U R                  S1S2SS!S3S#9  U R                  S4S5SS S6S79  U R                  S8S9SS S:S79  U R                  S;S<SS S=S79  U R                  S>S?SSS
/[        / S@QSASB9  U R                  SCSDSSE[        SFSG9  U R                  SHSIS[        S
/SJ9  U R                  SKSLS[        / SMQSJ9  U R                  SNSS!SOS#9  U R!                  SSP9  U R                  SQSRSS[        SS/STSU9  U R                  SVS[        S SWS9  U R                  SXSS!SYS#9  U R!                  SSZ9  ["        R$                  " U 5        U R'                  5       nU$ )[Nz-mz--modelsF+)zbert-base-casedzroberta-basegpt2z Pre-trained models in the list: z, )requirednargstypedefaultchoiceshelpz--model_sourcer(   r'   r*   zExport onnx from pt or tfz--model_classz!Model type selected in the list: )r  r  r  r  r  z-ez	--enginesr3   )r3   rN   r   r   r   zEngines to benchmarkz-cz--cache_dir.cache_modelsz%Directory to cache pre-trained models)r  r  r  r  z
--onnx_dironnx_modelszDirectory to store onnx modelsz-gz	--use_gpu
store_truezRun on gpu device)r  actionr  z
--providerzExecution provider to usez-pz--precisionzfPrecision of model to run. fp32 for full precision, fp16 for half precision, and int8 for quantization)r  r  r  r  z	--verbosezPrint more informationz--overwritezOverwrite existing modelsz-oz--optimizer_infozjOptimizer info: Use optimizer.py to optimize onnx model as default. Can also choose from by_ort and no_optz-vz--validate_onnxzValidate ONNX modelz-fz--fusion_csvz:CSV file for saving summary results of graph optimization.)r  r  r  z-dz--detail_csvz#CSV file for saving detail results.z-rz--result_csvz$CSV file for saving summary results.z-iz--input_counts)r(   r)   r&   zXNumber of ONNX model inputs. Please use 1 for fair comparison with Torch or TorchScript.)r  r  r  r  r  r  z-tz--test_timesd   z8Number of repeat times to get average inference latency.)r  r  r  r  z-bz--batch_sizes)r  r  r  z-sz--sequence_lengths)             @         z--disable_ort_io_bindingz=Disable running ONNX Runtime with binded inputs and outputs. )rq   z-nz--num_threadsr   zThreads to use)r  r  r  r  r  z--force_num_layersz%Manually set the model's layer numberz*--enable_arm64_bfloat16_fastmath_mlas_gemmzHEnable bfloat16 mlas gemm kernels on aarch64. Supported only for CPU EP )ru   )argparseArgumentParseradd_argumentr[   listr   keysjoinr   ospathr   FLOAT32r   BYSCRIPTintset_defaultsr   add_arguments
parse_args)parserrv   s     r   parse_argumentsr.  F  s0   $$&F
;V[[]#/$))FKKM2JJ  	 t(   ]#0499]3KK   O#  	 S.14   S-0-   kE,Uhi
(   !!Yu   eLOgh
(	   &&]#y   "   I   2   3   g  	 G   oSsQCP
,   "L	   u5
   4   4W	   G'DKr   c                  
   [        5       n [        U R                  5        U R                  [        R
                  :X  a'  U R                  (       d  [        R                  S5        g U R                  [        R                  :X  a7  U R                  (       a&  U R                  S;  a  [        R                  S5        g [        U R                  5      S:X  a%  [        U R                  S      S   S;   a  S/U l        [        U R                    Vs1 s H  oS::  a  ["        OUiM     sn5      U l        [        R%                  S	U  35        [&        R(                  R+                  U R,                  5      (       d!   [&        R.                  " U R,                  5        SU R2                  ;   nSU R2                  ;   nSU R2                  ;   nSU R2                  ;   nSU R2                  ;   nU(       ac  [4        R6                  " [8        R:                  5      [4        R6                  " S5      :  a'  [        R                  S[8        R:                   35        g [=        U R>                  5      n/ nU R                    GHg  n	[8        R@                  " U	5        [        RC                  [8        RD                  RG                  5       5        U(       d  U(       d  U(       Ga  U RH                  S/:w  a  [        RK                  S5        U(       at  U[M        U R                  U R                  U RN                  UU R                  U	U RP                  U R                  U RR                  SSU R,                  U R                  5      -  nU(       at  U[M        U R                  U R                  U RN                  UU R                  U	U RP                  U R                  U RR                  SSU R,                  U R                  5      -  nU(       at  U[M        U R                  U R                  U RN                  UU R                  U	U RP                  U R                  U RR                  SSU R,                  U R                  5      -  nU(       ar  U[U        U R                  U R                  U RN                  UU R                  U	U RP                  U R                  U RR                  U R,                  U R                  5      -  n0 n
U(       d  GM}   U RV                  (       + nU[Y        U R                  U R                  U R                  U RN                  UU R                  U	U RP                  U R                  U RR                  U RH                  U RZ                  U R\                  U R,                  U R^                  U R                  U R`                  U Rb                  UU
U Rd                  U Rf                  U 5      -  nGMj     [l        Rn                  " 5       Rq                  S5      nW
(       a$  U Rr                  =(       d    SU S3n[u        X5        [        U5      S:X  a'  U RP                  S/:w  a  [        RK                  S5        g U Rv                  =(       d    SU S3n[y        X5        U Rz                  =(       d    SU S3n[}        XU 5        g s  snf ! [0         a$    [        R                  S
U R,                  5         GNEf = f! [h         a    [        Rk                  S5         GM  f = f)Nzfp16 is for GPU only)migraphxrocmzint8 is for CPU onlyr(   r   r&   )rB   swimr   zArguments: z#Creation of the directory %s failedrN   r   r   r3   r   z2.0.0z2PyTorch version must be >=2.0.0 and you are using zB--input_counts is not implemented for torch or torchscript engine.TF	Exceptionz%Y%m%d-%H%M%Sbenchmark_fusion_z.csvzNo any result available.benchmark_detail_benchmark_summary_)?r.  r   r-   r9   r   r   rd   rG   rH   r   re   rK   modelsr   rj   sortedr,   	cpu_countr]   r%  r&  existsr2   mkdirOSErrorenginesr   rM   rN   rY   r   force_num_layersset_num_threadsr   
__config__parallel_inforl   rJ   r   rg   ri   
test_timesr  use_mask_indexr   rm   rn   ro   rp   rq   rt   ru   r3  r   r   r\   strftime
fusion_csvr   
detail_csvr   
result_csvr   )rv   xenable_torchenable_torch2enable_torchscriptenable_onnxruntimeenable_tensorflowrh   rw   r,   rs   rr   
time_stampcsv_filenames                 r   mainrP    sp   D~~***4<<+,~~'DLLT]]Rf=f+,
4;;1A!7!:o!M!#4CSCSTCSaFy9CSTUD
KK+dV$%77>>$..))	PHHT^^$ dll*L,M&$,,6&$,,6$4u'8'89GMM'<RRI%J[J[I\]^$T%:%:;OG''k*U%%3356=,>  QC'cd!;LLKK$$#NN$$))OONNLL   ;LLKK$$#NN$$))OONNLL   ;LLKK$$#NN$$))OONNLL   ~    %% G #%.-1-@-@)@&?LLMMKK$$#NN$$))OO%%''&&NNMMLLNN//*+%%AA/ [ (R ((9JN,=j\*N !8G
7|qs"NN56??J(9*T&JL7)??K(::,d&KL7$/_ U  	PLL>O	Pn  .  --.s+   Z(5 Z- +C)[-*[[\\__main__)4__doc__r  loggingr%  r   r   rS   psutilbenchmark_helperr   r   r   r   r   r	   r
   r   r   r   r   r|   r   huggingface_modelsr   r   onnx_exporterr   r   r   r   	packagingr   quantize_helperr   	getLoggerrG   r9  environr[   rN   transformersr   r   r   r   r   boolr   r  r.  rP  __name__r   r   r   <module>r_     s    :   	        ) 4   *			2	U+	 BJJ&$'	NBJJ !  @ @^Bm`T D 4DEP_0D zF r   