
    hGE                        S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	J
r
Jr  S SKJr  S SKJrJr  S SKJr  \R&                  " S5      rS'S jr " S S	5      rS
 rS rS rS rS rS\4S jrS rSS/4S\4S jjrS r S r!S r"S r#S\4S jr$\%S:X  Ga  \" 5       r&\" \&RN                  5        \&RP                  S:  d-  \&RR                  S:  d  \&RP                  \&RR                  -  S:  a  \RU                  S5        \RV                  RY                  \&R                  5      (       ap  \&RZ                  (       d_  \&R\                  (       d  \/" S \&R                   S!35      e\Ra                  S"\&R                  5        \Rb                  " \&R                  5        \" \&RP                  \&RR                  \&R                  5      r2\&RZ                  (       d	  \$" \2\&5         \" \2Rf                  5      r4\Ra                  S$5        \2Rf                  Rq                  S%S&5      r9\" \4\95        gg! \5 a-    \Rm                  S#\2Rf                   35        \2Rn                  r4 Njf = f)(    N)get_ort_environment_variablessetup_logger)main)PRETRAINED_GPT2_MODELS
Gpt2Helper)	OnnxModel c           
         [         R                  " 5       nUR                  SSS[        SSR	                  [
        5      -   S9  UR                  SS[        S	S
S9  UR                  SS[        SSS9  UR                  SS[        SSS9  UR                  SSSSS9  UR                  SS9  UR                  SSSSS9  UR                  SS9  UR                  SSSSS9  UR                  SS9  UR                  SSSS9  UR                  SS9  UR                  S SSS!S9  UR                  SS"9  UR                  S#SSS$S9  UR                  SS%9  UR                  U 5      nU$ )&Nz-mz--model_name_or_pathTz2Model path, or pretrained model name in the list: z, )requiredtypehelpz--csvFzgpt2_parity_results.csvz#path of csv file to save the result)r   r   defaultr   z--test_casesi  znumber of test cases per runz--runs(   znumber of repeated runs	--use_gpu
store_truezuse GPU for inference)r   actionr   )use_gpuz--allz'run all combinations of mixed precision)allz-e--use_external_data_format)r   r   )use_external_data_formatz	--verbose)verbosez--skip_testzEdo not run test, and only rank experiments based on existing csv file)	skip_testz--overwritezOverwrite existing csv file)	overwrite)	argparseArgumentParseradd_argumentstrjoinr   intset_defaults
parse_args)argvparserargss      j/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/gpt2/gpt2_parity.pyparse_argumentsr&      s   $$&F
ADIINdDee   )2   +   5sBMfg
eLOfg
&
6	   E"
:US_`
7
eLI
&
T	   %(
*	   %(T"DK    c                        \ rS rSrS rS rSrg)
ParityTaska   c                 D    X l         Xl        X0l        / U l        SU l        g )Nr   )
total_runs
test_casescsv_pathresultsrun_id)selfr-   r,   r.   s       r%   __init__ParityTask.__init__b   s    $$ r'   c                    [         R                   R                  5       R                  S5      nU SU R                   3nU =R                  S-  sl         [	        / UQSPU R
                   PSPU R                   PUUU R                  S9nU(       a  U R                  R                  U5        U$ ! [         a    [        R                  SU 35        S n U$ f = f)Nz%Y%m%d%H%M%S_   z-tz-r)experiment_namer0   csv_filenamezFailed to run experiment )datetimenowstrftimer0   r   r-   r,   r.   r/   append	Exceptionlogger	exception)r1   r"   r7   
start_timer0   results         r%   runParityTask.runi   s    &&**,55nE
<q.q	O$OO 1ODOT__<MO /!]]	F ##F+
 	  	88IJKF		s   AB/ /$CC)r.   r/   r0   r-   r,   N)__name__
__module____qualname____firstlineno__r2   rB   __static_attributes__ r'   r%   r)   r)   a   s    r'   r)   c                     / nSS K n[        U SS9 nUR                  " U5      nU H  nUR                  U5        M     S S S 5        U$ ! , (       d  f       U$ = f)Nr   r	   newline)csvopen
DictReaderr<   )r.   rowsrM   csvfilereaderrows         r%   load_results_from_csvrT   ~   sV    D	h	#w(CKK  
$ K	 
$	# Ks   -A
Ac                 t    U  H(  nUR                  S5      (       d  M  [        X   5      s  $    [        S5      e)Nzaverage_latency(batch_size=z)Failed to get average_latency from output)
startswithfloatRuntimeError)rS   names     r%   get_latencyrZ      s7    ??899##  B
CCr'   c                 r    [        U 5      n[        U S   5      n[        U S   5      nUS-  US-  -
  US-  -
  $ )z@Scoring function based on 3 metrics. The larger score is better.top1_match_rateonnx_size_in_MBi  
   d   )rZ   rW   )rS   latency_in_msr\   r]   s       r%   scorera      sK    $MC 123OC 123OT!MB$6639NNNr'   c                 2  ^
 [        5         [        S5        0 m
U H  nUT
US   '   M     [        [        U R                  5       U
4S jSS95      n[        R                  U SU 35        [        R                  SU S35        S	nS
n[        UR                  5       5       Ht  u  nu  pX:w  a  UnU	nU H]  nUS   U:X  d  M  [        R                  SR                  UU	U[        U5      [        US   5      US   US   [        5       5      5          Mr     Mv     g )Nz
**********r0   c                 0   > U S   [        TU S      5      4$ )Nr6   r   )ra   )itemrow_maps    r%   <lambda>print_wins.<locals>.<lambda>   s    d1guWT!W-='>?r'   T)keyreversez Wins:z	Based on z* wins and a scoring function, the ranking:r   za{:02d}: WINs={:02d}, run_id={}, latency={:5.2f}, top1_match={:.4f}, size={}_MB, experiment={}, {}r\   r]   
experiment)printdictsorteditemsr>   debuginfo	enumerateformatrZ   rW   r   )winsrP   	test_namerS   sorted_winsrankprevious_valuecountrh   valuere   s             @r%   
print_winsr{      s"   	G	(OG!$H  JJL?	
K LLI;f[M23
KK)I;&PQRDN():):)<=|"DC8}#w~~#C(c"345-.L)57	    >r'   c                 :   0 n0 nU  H  nUS   nSX%'   SX5'   M     [        USSS9 n/ SQn[        R                  " XgS9nUR                  5         / SQn	[	        U 5      n
[        U
S	-
  5       GH  nX   n[        US
   [        5      (       a  [        R                  " US
   5      nOUS
   n[        US	-   U
S	5       GHy  nX   nSnU	 H  nUU   UU   :w  d  M  Sn  O   U(       d  M,  [        US
   [        5      (       a  [        R                  " US
   5      nOUS
   n [        R                  R                  UUSSS9u  nn[        R                  R                  UUSSS9u  nnUbC  US:  a=  [        US   5      [        US   5      :  a  X,S   ==   S	-  ss'   OX/S   ==   S	-  ss'   US:  a=  [        US   5      [        US   5      :  a  X<S   ==   S	-  ss'   OX?S   ==   S	-  ss'   US   US   US   [        US   5      US   US   [        US   5      UUUUS.nUR!                  U5        GM|     GM     SSS5        ["        R%                  SU 35        ['        X S5        ['        X0S5        g! [         a    SnSn GNFf = f! , (       d  f       NT= f)zRun U test and T test.r0   r   wr	   rK   )
model_namerun_id_1experiment_1top1_match_rate_1run_id_2experiment_2top1_match_rate_2U_statisticU_pvalueT_statisticT_pvalue)
fieldnames)r~   r-   runsr6   top1_match_rate_per_runTFz	two-sided)use_continuityalternativeN)axis	equal_varg?r\   r~   rk   z(U-Test and T-Test results are output to zU-TestzT-Test)rN   rM   
DictWriterwriteheaderlenrange
isinstancer   jsonloadsscipystatsmannwhitneyu
ValueError	ttest_indrW   writerowr>   rq   r{   )rP   output_csv_path
utest_wins
ttest_winsrS   r0   rQ   column_nameswriterrequired_match_columnsnum_resultsiresult1ajresult2all_matchedcolumnbutest_statisticutest_pvaluettest_statisticttest_pvalues                          r%   run_significance_testr      s   JJX

 
 
osB	/7
 A!E$i{Q'AgG'";<cBBJJw'@AB561q5+q1'"4Fv'&/9&+ 5 #g&?@#FF

7+D#EFA 9:A(49KK4L4L1T{ 5M 51O\ 160E0EaQUae0E0f-+t0CW%6785IZA[;\\"8#45:5"8#45:5$&W%6785IZA[;\\"8#45:5"8#45:5 #*,"7 ' 1$+L$9).w7H/I)J ' 1$+L$9).w7H/I)J#2 ,#2 , $g 2 () 
0` KK:?:KLMz*z*E " (&*O#'L(a 
0	/s8   B.JAJ#I7C<J7J		JJ			J
Jraw_onnx_modelc                    [         R                  " U 5      n[        U5      nUR                  5       nUR                  R
                  S   R                  U;   d   eX1R                  R
                  S   R                     nUR                  S:X  a.  [        R                  SUR                   35        UR                  $ [        R                  SUR                   SUR                   35        g )Nr   MatMulz#Found last MatMul node for logits: z-Failed to find MatMul node for logits. Found z	 of node )onnxloadr   output_name_to_nodegraphoutputrY   op_typer>   rq   warning)r   model
onnx_modelr   nodes        r%   get_last_matmul_node_namer   "  s    IIn%E5!J$88:;;a %%)<<<<{{11!499:D||x9$))EFyy
NNB4<<.PYZ^ZcZcYdefr'   c                     U R                   nSU S3R                  5       nU R                  (       a  UR                  S5        USSSU/-  nU(       a  UR	                  S/UQ5        U$ )N-m  -o --use_gpu -p fp16r   --io_block_listlogitsz--node_block_list--op_block_list)model_name_or_pathsplitr   r<   extend)r$   last_matmul_node_nameop_block_listr   
parameterss        r%   get_mixed_precision_parametersr   1  sz    ##Eug2399;J$$67	 J ,=}=>r'   FastGeluLayerNormalizationtaskc                     [        XU5      nSR                  [        U5      5      nU(       a  SU S3nOSU S3n[        5       nU(       a	  USU S3-   nU R	                  XF5        g )N,Mixed precision baseline +  in FP32z=Mixed precision baseline (logits output and last MatMul node z	 in FP32)z ())r   r   rn   r   rB   )r   r$   r   r   r   op_block_list_strrY   env_varss           r%   run_candidater   C  sr     0][J!67,->,?xHNOdNeeno,.H8*A&&HHZr'   c                 <   U R                   nSU S3R                  5       nU R                  (       a  UR                  S5        U R                  (       a  UR                  S5        SU S3R                  5       nU R                  (       a  UR                  S5        X#4$ )Nr   z -o -p fp32r   r   r   )r   r   r   r<   r   )r$   r   fp32_baselinefp16_baselines       r%   get_baselinesr   X  s    ##E%,224M||[)$$9:% 56<<>M$$9:''r'   c                    SS/nU R                  X-   S5        S/nU R                  X-   S5        U R                  X-   S/-   [        U5      -   S/-   S5        / nUnU HV  nS/U V	s/ s H  oU:w  d  M
  U	PM     sn	-   n
U R                  X-   U
-   S	U S
35      nU(       d  ME  UR                  U5        MX     [        US S9n[	        SU5        gs  sn	f )z:Step 0 is to check which operator in FP16 causes most lossr   r   zFP16 except logitsz--keep_io_typeszGraph I/O FP32, Other FP16r   z--force_fp16_initializerszFP32 except weights in FP16zFP32 except z in FP16c                     U S   $ )Nr\   rI   )ys    r%   rf   "run_tuning_step0.<locals>.<lambda>~  s
    !<M:Nr'   )rh   z<step 0: optimized operator causes the most loss in precisionN)rB   listr<   minrl   )r   r   all_opsoptimized_opsfp32_logitsfp32_iooptimized_ops_resultsop_listopor   rA   
min_results                r%   run_tuning_step0r   g  s    $h/KHH](*>? !GHH]$&BC 	HH#4"55WEIdHee%
 G*+'.M'Q"Wq'.MM-1MA\RTQUU]C^_6!((0	  *0NOJ	
H*U /Ns   '	C4Cc                 N    U H  nSU/nU R                  X-   SU S35        M!     g)zKStep 1 is to figure out which optimized operator in FP32 could benefit mostr   r   r   N)rB   )r   mixed_precision_baseliner   r   r   s        r%   run_tuning_step1r     s4    *B/$4)"X6	
 r'   c           	          / SQnU Vs/ s H  oDU;   d  M
  UPM     nnU HG  nXe;  d  M
  / UQUPnU R                  / UQSPUQSR                  SR                  U5      U5      5        MI     gs  snf )zAssumed that you have run step 0 and 1 to figure out that Logits FP32 and some operators shall be in FP32,
This step will try add one more operator.
)r   r   SkipLayerNormalizationr   z(Mixed precision baseline + {},{} in FP32r   N)rB   rs   r   )r   r   r   candidate_fp32_opsxfp32_opsr   r   s           r%   run_tuning_step2r     s     V-D-am1C-HD+hOOMHHN*N,=NN:AA#((8BTVXY  Es
   	A,A,c           	      @  ^
 [         R                  " SUR                  UR                  / S9n[	        U5      u  p4U R                  US5      n/ nU(       a%  SU;   a  US   (       a  US   R                  S5      nO[        S5      e/ m
U(       a%  SU;   a  US   (       a  US   R                  S5      m
O[        S5      eUR                  (       d  [        R                  S	5        g U R                  US
5        [        US   5      n[        XU/ S9  U
4S jnUR                  (       a0  [        XT
U5        [        X/ S9n	[!        X	U5        [#        X	U5        O [        U UUU" / SQ5      S9  [        XUS/S9  [        U UUU" / SQ5      S9  [        U UUU" / SQ5      S9  g )Nonnx_models)
new_folderremove_existingzFP32 baselineoptimized_operatorsr   z!Failed to get optimized operators	operatorszFailed to get operatorsz5skip mixed precision since --use_gpu is not specifiedzFP16 baselineraw)r   c                 B   > U  Vs/ s H  oT;   d  M
  UPM     sn$ s  snf NrI   )r   r   r   s     r%   get_fp32_ops run_parity.<locals>.get_fp32_ops  s    0Qr-Q000s   	)r   r   Addr   )r   r   r   r   )r   EmbedLayerNormalizationr   r   r   )r   get_onnx_pathsr   r   r   rB   r   rX   r   r>   rq   r   r   r   r   r   r   r   )r   r$   onnx_model_pathsr   r   rA   r   r   r   r   r   s             @r%   
run_parityr    s   !0000	 $1#6 MXXm_5FM(F2?T8U45;;C@>??G;&(f[.A%++C0455 <<KLHH]O,56Fu6MN $32F1 xxg}E#A$mo#p GG!&'^_		
 	d"7
|T "#fg	 "j
	r'   __main__r_      i'  zNot enough test cases or runs to get stable results or test significance. Recommend test_cases >= 100, runs >= 20, test_cases * runs >= 10000.zOutput file zK existed. Please remove the file, or use either --skip_test or --overwrite.z6Remove existing file %s since --overwrite is specifiedzFailed to load csv z#Start running significance tests...z.csvz
.stats.csvr   ):r   rM   r9   r   loggingosr   scipy.statsr   benchmark_helperr   r   convert_to_onnxr   gpt2_helperr   r   r   r   	getLoggerr>   r&   r)   rT   rZ   ra   r{   r   r   r   r   r   r   r   r   r   r  rD   r$   r   r-   r   r   pathexistsr   r   rX   rq   remover   r.   rP   r=   r?   r/   replacesummary_csvrI   r'   r%   <module>r     s%    
    	   H   :  			2	@F :DO'T[+|c , 34	
*(V6
CZ CL zD		B$//DII2MPU2US	

 
ww~~dhh~~txxj(st  KKPRVRZRZ[IIdhhdootyy$((;D>>4$T]]3
 KK56--''=K$,A 4  .t}}o>?||s   !H+ +0II