
    hX                     |   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKr	S SK
r
S SKrS SKrS SKJrJr  S SKJr  S SKJr  S SKJrJrJr  S SKJr  S SKJrJrJr  S SKr\R@                  " \!5      r"S\ RF                  4S	 jr$S\ RF                  4S
 jr%S r&S r'S r(S r)S r*S r+S r,S r-\!S:X  a  \-" 5         gg)    N)measure_memorysetup_logger)get_library_path)ORTModelForSpeechSeq2Seq)ProfilerActivityprofilerecord_function)trange)AutoModelForSpeechSeq2SeqWhisperConfigWhisperProcessorargsc                   ^ ^^	^
 T R                   S;  a  [        S5      eU 4S jmU 4S jm	T R                  T R                  T R                  T R
                  T R                  T R                  S.nT R                   S:X  Ga  UR                  5        HA  u  p#[        R                  " U/SU;   a  [        R                  O[        R                  S9X'   MC     T R                  (       a1  [        R                  " T R                  /[        R                  S9US	'   T R                  (       a1  [        R                  " T R                   /[        R                  S9US
'   T R"                  (       a1  [        R                  " T R$                  /[        R                  S9US'   [&        R)                  ST R*                   35        UU	4S jn[-        T UT R.                  5        U" T R.                  5      nT R.                  (       a  XQS'   U$ [&        R)                  S5        T R                   S:X  a  SOSm
U U
4S jn[-        T Xe5        U" U5      nT R                   S:X  a  XqS'   U$ UR1                  T R2                  (       a  [4        R6                  O[4        R                  T R8                  S9US'   T R:                  US'   SUS'   SUS'   T R                  (       a  T R                  US'   U$ )N>   orthf-orthf-pt-eagerhf-pt-compilez/Unable to auto-detect inputs for provided modelc                  t   > [         R                  " TR                  5      n [         R                  " U 5      n U $ N)whisper
load_audio
audio_pathpad_or_trim)audior   s    k/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/whisper/benchmark.pyload_via_ffmpeg#get_inputs.<locals>.load_via_ffmpeg$   s,    ""4??3##E*    c                    > [        TR                  S5       n [        R                  " [	        U R                  5       5      [        R                  S9n[        R                  " U/5      nS S S 5        U$ ! , (       d  f       W$ = f)Nrbdtype)openr   npasarraylistreaduint8array)fr   r   s     r   load_via_numpy"get_inputs.<locals>.load_via_numpy)   s]    $//4(AJJtAFFH~RXX>EHHeW%E )  )( s   AA44
B)
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   penaltyr!   decoder_input_idslogits_processortemperaturezLoad audio: c                 .   > U (       a  T" 5       $ T" 5       $ r    )onnx_e2er   r+   s    r   <lambda>get_inputs.<locals>.<lambda>D   s    ^%5%XGX%Xr   audio_streamzFeature extraction: r$   ptc                 d   > TR                   R                  U /TTR                  S9R                  $ )N)return_tensorssampling_rate)	processorfeature_extractorr@   input_features)r   r   return_types    r   r:   r;   P   s3    !A!A	4;M;M "B "n"r   rC   )r"   deviceinputsno_repeat_ngram_sizeTearly_stopping	use_cacheforced_decoder_ids)benchmark_type	Exceptionr-   r.   r/   r0   r1   r2   itemsr$   r)   float32int32has_decoder_input_idsr4   has_logits_processorr5   has_temperaturer6   loggerinfor   time_fnhas_audio_streamtouse_fp16torchfloat16target_devicerG   )r   rF   kvload_audio_fn
audio_dataprocessor_fnrC   r   r+   rD   s   `       @@@r   
get_inputsra       s_   "SSIJJ
 oooo^^ $ 9 9--"55F e#LLNDA!)q.BJJbhhWFI #%%*,((D4J4J3KSUS[S[*\F&'$$)+43H3H2IQSQYQY)ZF%&$&HHd.>.>-?rzz$RF=! KK,t/01XMD-!6!67t445J!+~ KK&'--6$DKL D,+!*-Ne##1 %((#}}emm%--HZHZ ) F8 &*%>%>F!"#FF;'+'='=#$Mr   c                 2   Su  pSu  p4U R                   S;   a  U R                  (       a  U R                  OU R                  n[        R                  " 5       n[        R
                  " UU R                  (       a  [        R                  O[        R                  SS9R                  U R                  5      n[        R                  " 5       nU R                   S:X  a  [        R                  " U5      nOU R                   S;   a  [        R                  " 5       nU R                  Ul        UR#                  [%        5       5        U R&                  (       aK  SUl        SUl        U R,                  (       a,  [        R.                  " S5        [        R0                  " S5        O[3        S	U R                    35      eU R                   S
:X  a  [5        U R6                  5      [8        L a  U R6                  S   OU R6                  n[5        U R6                  5      [8        L a  U R6                  S   OS n[        R                  " 5       n[:        R
                  " U R<                  UUUSS9n[        R                  " 5       nU R                   S:X  aw  [>        RA                  SU RB                   35        [        R                  " 5       n[        RD                  " U RB                  UU R6                  /S9n[        R                  " 5       n[>        RA                  SXC-
   S35        U$ )N)NN   r   r   T)torch_dtyperI   r   >   r   r      r   Cannot recognize r   )providerprovider_optionssession_optionsuse_io_bindingr   zLoading model from )	providerszLoaded model in  s)#rK   hf_pt_model_path
model_nametimer   from_pretrainedrX   rY   rZ   rN   rW   r[   compiler   SessionOptionsr   enable_profilingregister_custom_ops_libraryr   verboselog_verbosity_levellog_severity_leveltuneset_default_logger_severityset_default_logger_verbosityrL   typeexecution_providertupler   hf_ort_dir_pathrS   rT   ort_model_pathInferenceSession)r   modelsess_options
start_timeend_timesourcerg   rh   s           r   	get_modelr   h   sg   $E%J >>*.*?*?&&T__YY[
)99)-EMM
 "T
 	 	
 99;/1MM%(E			 1	1))+(,%001A1CD<</0L,./L+yy//2003 +D,?,?+@ABBh&15d6M6M1NRW1W4**1-]a]t]t9=d>U>U9VZ_9_42215eiYY[
(88  -(
 99;e#)$*=*=)>?@YY[
$$../

 99;
KK"8#8"9<=Lr   c                 T   [        U5      [        L a  US   OUn[        U5      [        L a  US   OUn[        R                  " U R                  5      nU R
                  S:X  a  [        U R                  5      O"[        U R                  [        R                  SS9nU R                  (       a  U" U5      n[        R                  U5        U H  nU" U5        M     U R                  S:w  a  [        R                  R                  U5        [         R                   " 5       n	U R
                  S:X  a  [        U R"                  5      O"[        U R"                  [        R                  SS9n
U
 H  nU" U5        M     U R                  S:w  a  [        R                  R                  U5        [         R                   " 5       nU R
                  S:w  a  [        R                  S5        SnX-
  U R"                  -  nX-  n[        R                  S	U S
35        [        R                  SU S35        g )Nr   re   r   zWarm up)filedesccpu	Benchmark z	Latency: rl   zThroughput: z qps)r{   r}   rY   rE   r[   rK   rangewarmup_runsr
   sysstdoutru   rS   rT   cudasynchronizero   num_runs)r   fnrF   warmup_inputsbenchmark_inputstorch_devicewarmup_rangeoutputs_r   bench_ranger   
batch_sizelatency
throughputs                  r   rU   rU      s   !%f!6F1IFM$(LE$9vayv<< 2 23L
 %' 	dD$$3::IF  ||]#G
=  {{e

|,J %' 	dmmDMM

E 
 
  {{e

|,yy{H e#BJ$5G%J
KK)G9B'(
KK,zl$/0
r   c                 @   U R                   R                  5        SU R                   SU R                   SUR                  R                  SS5       SU S[        R                  R                  5       S 3nS nU R                   S;   a  [        [        R                  [        R                  /SSS9 n[        S5         U" U5        S S S 5        S S S 5        WR                  SS	9R                  U R                  U R                   S
9n["        R$                  R'                  U R(                  U S35      n[+        US5       nUR-                  U5        S S S 5        U$ U" U5        U S3nU$ ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       U$ = f)N-r   z%Y-%m-%d_%H:%M:%Src   T)
activitiesrecord_shapesprofile_memorymodel_inference   )group_by_stack_n)sort_by	row_limitz.logw.json)rK   lower	precisionrE   __name__replacedatetimenowr   r   CPUCUDAr	   key_averagestablept_filter_bypt_num_rowsospathjoin
log_folderr#   write)	r   r   rF   inputs_typeprefixfilenameprof	prof_datar*   s	            r   
profile_fnr      s    ##))+,Adnn-=Qt{{m1R[[M`M`adfiMjLkklmxlyyz  |D  |M  |M  |Q  |Q  |S  Te  {f  gFH>>(,,.>.C.CDTXim
 !236
 4

 %%q%9??HYHYeieueu?v	77<<F84A(C AGGI ! O 	6
 XU#O 43
 
 !  Os0   2E=>	E,E=F,
E:	6E==
F
Fc                   ^^ [         R                  " 5       n[        R                  " U5      nUR	                  SS9  T" T5        [
        R                  SUR	                  S S9 S35        [        R                  " 5         [        R                  R                  5         [        U R                  S:g  UU4S jU R                  S9  [        R                   R#                  5         g )Ng?)intervalzCPU usage: %r   c                     > T " T5      $ r   r8   )r   rF   s   r   r:   measure_fn.<locals>.<lambda>  s	    r&zr   )is_gpufuncmonitor_type)r   getpidpsutilProcesscpu_percentrS   rT   gccollectrY   r   empty_cacher   rE   r   r   r   flush)r   r   rF   pidprocesss    ``  r   
measure_fnr      s    
))+CnnS!G%vJ
KK+g1141@ACD JJL	JJ4;;%/7IX\XiXij JJr   c                   ^ ^^
 U4S jm
U U
4S jnUnT R                   S:X  a  U" U5        T R                  (       Ga#  [        T XAS5      nT R                   S:X  Ga  US [        S5      *  nTR                  R
                  R                  5       nUS-   n[        R                  R                  U5      (       aZ  [        R                  SU S	U 35        [        R                  " U[        R                  R                  T R                  U5      5        TR                  R
                  R                  5       nUS
-   n[        R                  R                  U5      (       aZ  [        R                  SU S	U 35        [        R                  " U[        R                  R                  T R                  U5      5        TR                   R
                  R                  5       nUS-   n[        R                  R                  U5      (       aZ  [        R                  SU S	U 35        [        R                  " U[        R                  R                  T R                  U5      5        g [        R#                  S5        [%        T XA5        U" U5      u  p[        R#                  S[        US   5       S35        [        R#                  SU	S    35        ['        T XA5        g )Nc                 ,   > TR                   " S0 U D6nU$ )Nr8   )generate)rF   predicted_idsr   s     r   get_pred_ids&run_hf_inference.<locals>.get_pred_ids  s    00r   c                    > T" U 5      n/ n[        TR                  5       H/  nUR                  TR                  R	                  USS9S   5        M1     X4$ )NTskip_special_tokensr   )r   r0   appendrA   batch_decode)rF   r   transcriptionr   r   r   s       r   gen_and_dec%run_hf_inference.<locals>.gen_and_dec  sY    $V,t001A  !<!<]`d!<!efg!hi 2++r   r   zgen-and-decr   r   z-encoder.json	Renaming  to z-decoder.jsonz-decoder-with-past.jsonz
Evaluating PyTorch...Generated token length: r    tokensTranscription: )rK   r   r   lenencodersessionend_profilingr   r   isfilerS   warningrenamer   r   decoderdecoder_with_pastrT   rU   r   )r   rF   r   r   generate_fnnew_logname
new_prefixold_lognamer   r   r   s   ` `       @r   run_hf_inferencer     s   
,  Ko-F||| {MJ(*$_G}5J--//==?K$6Kww~~k**;-tK=IJ		+rww||DOO['QR--//==?K$6Kww~~k**;-tK=IJ		+rww||DOO['QR1199GGIK$'@@Kww~~k**;-tK=IJ		+rww||DOO['QR KK)*D+&#.v#6 M
KK*3}Q/?+@*AIJ
KK/-"2!345t[)r   c                   ^ ^ SU U4S jjnU4S jnU4S jnU 4S jnT R                   S:w  a  UOUnU" U5      nT R                  (       ax  [        T XxS5      n	TR                  5       n
[        R                  SU
 SU	 35        [        R                  " U
[        R                  R                  T R                  U	5      5        g [        R                  S	5        UnT R                  (       a
  U" US
S9nX4n[        T X{5        U" U5      nT R                   S:w  a  UR                  5       nUS   nT R                  (       a  [        R                  SUS   S    35        O\U" US   S   5      n[        R                  S[!        U5       S35        T R"                  R%                  US   S
S9S   n['        U 5        [)        T Xx5        g )Nc                   > TR                  5        Vs1 s H  o"R                  iM     nn[        U R                  5       5      nX4-
  n[	        U5      (       a#  [
        R                  SU 35        [        S5      eU(       a  TR                  (       a  U S   U S'   XC-
  n[	        U5      (       a$  U H  n[
        R                  SU S35        X	 M      TR                  S:w  a  TR                  5       nU R                  5        H  u  pUR                  X5        M     TR                  5        H2  nUR                  UR                  TR                  TR                   S9  M4     U$ U $ s  snf )	Nz(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.r-   r.   zRemoving unnecessary input 'z' from user provided inputsr   )device_type	device_id)ra   namesetkeysr   rS   errorrL   rx   rT   rE   
io_bindingrM   bind_cpu_inputget_outputsbind_outputr   )rF   warmupmodel_inputmodel_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputr   r\   r]   outputr   r   s               r   prepare_ort_inputs-run_ort_inference.<locals>.prepare_ort_inputsL  sG   <A<L<L<NO<N[((<NO&++-(%3~LLCNCSTUcdddii#),#7F<  )7!""%7!:;L:MMhij- &8
 ;;%))+J))!/ '++-&&v{{W[WeWe&f .5 Ps   E'c                 *   > TR                  U 5        U $ r   )run_with_iobinding)r   r   s    r   with_io_binding*run_ort_inference.<locals>.with_io_bindingj  s      ,r   c                 ,   > TR                  S U 5      nU$ r   )run)rF   r   r   s     r   without_io_binding-run_ort_inference.<locals>.without_io_bindingo  s    ))D&)r   c                    > TR                   U ;   a1  [        R                  " U TR                   :H  5      S   S   nU S US-    $ U $ )Nr   re   )eos_token_idr$   where)r	  	first_endr   s     r   handle_output(run_ort_inference.<locals>.handle_outputt  sJ    &4+<+<!<=a@CI/IM**r   r   e2er   r   z
Evaluating ONNX Runtime...T)r  r   r   r   r   r   )F)rE   r   r   r   rS   r   r   r   r   r   r   rT   rx   rU   copy_outputs_to_cpurV   r   rA   r   printr   )r   rF   r   r
  r  r  r  r   
ort_inputsr   r   ort_evaluate_inputsort_warmup_inputsort_outputsactual_outputr   s   ` `             r   run_ort_inferencer"  K  s    <

 &*[[E%9/?QK#F+J|| {F ))+;-tK=AB
		+rww||DOO[IJ KK./$yy.vdC0=D+3j)K{{e!557a.Kok!nQ&7%89: &k!nQ&78.s=/A.B'JK33KNX\3]^_`!t[-r   c                     U R                   S;   a  [        XU5        g U R                   S:X  a  [        XU5        g [        SU R                    35      e)N>   r   r   r   r   rf   )rK   r   r"  rL   )r   rF   r   s      r   run_inferencer$    sM    HHu-				%$.+D,?,?+@ABBr   c            
         [         R                  " 5       n U R                  SS[        S/ SQS9  U R                  SS[        SSS	9  U R                  S
S[        SS/ SQSS9  U R                  S[        SSS9  U R                  S[        SSS9  U R                  S[        SSS9  U R                  SS[        SSS	9  U R                  SS[        [        R
                  R                  5       (       a  SOS/ SQS 9  U R                  S!S"[        S#S$9  U R                  S%S&[        S'S$9  U R                  S(S)[        S*S$9  U R                  S+[        S,S$9  U R                  S-[        S.S/S9  U R                  S0[        S1S$9  U R                  S2[        S#S$9  U R                  S3[        S4S$9  U R                  S5[        S4S$9  U R                  S6[        S7S$9  U R                  S8[        S7S$9  U R                  S9[        S:S$9  U R                  S;[        S<S=S9  U R                  S>[        S4S?S9  U R                  S@[        S7SAS9  U R                  SBSCSDSE9  U R                  SF[        SGSHS9  U R                  SI[        SJSKS9  U R                  SLSCSDSE9  U R                  SM[        [        R                  R                  SN5      SOS9  U R                  SPSCSDSQSR9  U R                  5       n[        R                  R                  UR                  5        [        R                   " UR                  5        UR"                  Ul        SSUR&                  ;   a  UR"                  R)                  5        ST3Ul        UR*                  SU:X  a   UR*                  SVUR,                  04Ul        OKUR*                  SW:X  a;  UR*                  UR,                  S4UR.                  (       a  S4OS#SX.4Ul        SUl        UR&                  SY:X  a  UR0                  (       d   SZ5       eUR&                  SS:X  a  UR2                  (       d   S[5       e[4        R6                  " UR8                  5      Ul        U$ )\Nz-btz--benchmark-typeT)r   r   r   r   )r{   requiredchoicesz-mz--model-namez;Hugging Face name of model (e.g. 'openai/whisper-large-v2'))r{   r&  helpz-pz--precisionfp32)int8fp16r)  zePrecision for model. For ONNX models, the model's precision should be set before running this script.)r{   r&  defaultr'  r(  z--hf-pt-model-pathr   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))r{   r,  r(  z--hf-ort-dir-pathzaPath to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-az--audio-pathz%Path to audio file for E2E evaluationz-dz--devicer   r   )r   r   rocm)r{   r,  r'  z-idz--device-idr   )r{   r,  z-wz--warmup-runsr   z-nz
--num-runs
   z--seed   z--sampling-ratei>  zSampling rate for audio (in Hz)z--max-lengthi  z--min-lengthz--num-beamsre   z--num-return-sequencesz--length-penaltyg      ?z--repetition-penaltyz--no-repeat-ngram-size   z--decoder-input-idsz[]zThe forced decoder ids for generation. Format is [start token, timestamp token, language token, task token]. Default is [start token]. See `decoder_input_ids` in https://github.com/microsoft/Olive/tree/main/examples/whisper for details.z--logits-processorzLWhether to use timestamps logits processor or not (0 for false, 1 for true).z--temperaturez!Temperature value for generation.z	--profileF
store_true)r,  actionz--pt-filter-byself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--tunezFOnly used by ROCm EP, enable TunableOp tuning to select fastest kernel)r,  r2  r(  r   ExecutionProviderCUDAExecutionProviderr   ROCMExecutionProvider)r   tunable_op_enabletunable_op_tuning_enabler   z,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path`)argparseArgumentParseradd_argumentstrrY   r   is_availableintfloatr   r   r   
parse_argsr$   randomseedmanual_seedrE   r   rK   upperr|   r   rx   r~   r   astliteral_evalr4   )parserr   s     r   rA  rA    s   $$&F
A   J   (t   ]	   p	   !	   n3Lst
**1133'   }3B
oCC
lbA
sA6 )UIjk S#>
S!<
C;
0sAF
*D
.UCH
0sAF  |	   [	   0	   U<H
s,AHl   c4Fvw
U<H
S"'',,s:KRmn
U	   D IINN499	dii D###%)[[%6%6%8$99J"K""&=='+'>'>dnn@]&^D#$$(??''!%)*59YYA'D# !DK h&##S%SS#e#""Q$QQ" !--d.D.DEDKr   c                     [        5       n [        U R                  5        [        R	                  U R
                  5        S[        R                  R                  l	        [        R                  " U R                  5      n[        R                  " U R                  5      nU R                  S:w  a  SU R                   3OU R                  nU R                   S:H  n[#        U SU5        [#        U SU5        [#        U SU5        [#        U SS	5        [#        U S
UR$                  5        [        R	                  SU R&                   35        [)        U 5      nU R*                  S:X  a  UR-                  5        Vs1 s H  ofR.                  iM     nnSU;   U l        [#        U SSU;   5        [#        U SSU;   5        [#        U SSU;   5        U R&                  / :X  a  UR2                  /U l        [-        U 5      n[5        XU5        g s  snf )NTr   zcuda:r+  rA   r[   rX   rV   Fr  zForced decoder prompt ids: r   r<   rP   r4   rQ   r5   rR   r6   )rA  r   ru   rS   rT   __dict__rY   backendscudnn	benchmarkr   rp   rn   r   rE   r   r   setattrr  r4   r   rK   ra   r   rV   decoder_start_token_idr$  )	r   configrA   r[   rX   r   r  ort_model_inputsrF   s	            r   mainrR  =  s   <D
KK%)ENN"**4??;F 00AI04u0DeDNN+,$++M~~'HD+y)D/=1D*h'D$e,D.&"5"56
KK-d.D.D-EFG dOEe#@E@P@P@RS@R,,@RS .2B B-/BFV/VW,.@DT.TU':J)JK!!R'&,&C&C%DD"F$& Ts   2G=__main__).r:  rF  r   r   loggingr   r   ro   numpyr$   r   rY   r   benchmark_helperr   r   onnxruntime_extensionsr   optimum.onnxruntimer   torch.profilerr   r   r	   tqdmr
   transformersr   r   r   onnxruntimer   	getLoggerr   rS   	Namespacera   r   rU   r   r   r   r"  r$  rA  rR  r8   r   r   <module>r_     s     
  	  	 
      9 3 8 E E  S S 			8	$EX'' EP@H&& @F.b:$=*@U.pCNb!'H zF r   