
    hrL                         S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SK	J
r
Jr  \ R                  " \5      r " S S5      r " S S5      rg)    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   `    \ rS rSrSS jrS rS\4S jrS\4S jr	SS jr
SS	 jrS
 rS rSrg)
Gpt2Metric   c                     US:  a  US::  d   eX l         Xl        U SU 3U l        X0l        SU l        SU l        SU l        SU l        SU l        S U l	        S U l
        0 U l        g )N   d   z vs r   )baseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer   s       j/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__Gpt2Metric.__init__   sw    qyUc\))%'*+4?	
 ! !"#&'./$3737!    c                 V   U R                   U R                  :w  a  [        S5        [        SU R                   SU R                    S35        U R                  S:  a  SU R                  -  U R                  -  nSU R
                  -  U R                  -  n[        SU R                   SU R                   S	US
 SU R                   SU R
                   S	US
 S35        [        S5        [        SU R                  S 35        [        SU R                  S 35        O[        SU R                   S35        U R                  (       a  [        S5        SnSn[        U R                  R                  5       5       H  n[        R                  " U R                  U   5      S-  nUS:X  a  [        SU SUS
 S35        O"[        SSU-   SSUS-   -  S-
   SUS
 S35        X6[        U R                  U   5      -  -  nU[        U R                  U   5      -  nM     [        SX4-  S
 S35        g g )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r
   z]:	zAverage Latency: )r   r   printr   r   r   r   r   r   r   sortedkeys
statisticsmeanlen)r   top_1_error_ratetop_k_error_ratetotalcountkeyaverages          r   r#   Gpt2Metric.print%   sS   ==DNN*%LL 0DMM?"MN!!A%#(4+;+;#;d>P>P#P #(4+;+;#;d>P>P#P T//0D<L<L;MRP`adOeeklplvlvkww}  C  O  O  ~P  PR  Sc  dg  Rh  hj  k %&OD$8$8#=>?OD$@$@#EFGL 0=>CDEEd22779:$//$*>*>s*CDvM!8Bse<}C@AC3xr!a.1*<)=T'#cRS3t';';C'@#AAAT11#677 ; %emC%8<=  r   is_empty_pastc                     X-
  R                  5       R                  5       nU(       a  [        U R                  U5      U l        U$ [        U R                  U5      U l        U$ N)absmaxr   r   )r   baseline_logitstreatment_logitsr0   diffs        r   diff_logitsGpt2Metric.diff_logitsC   sW    2779==?+.t/K/KT+RD(  $'t';';T#BD r   
batch_sizec                     U =R                   U-  sl         [        R                  " US4[        R                  S9U l        [        R                  " US4[        R                  S9U l        g )Nr
   dtype)r   torchzerosboolr   r   )r   r:   s     r   start_batchGpt2Metric.start_batchL   sI    j( %ZO5:: N %ZO5:: Nr   c                 R   U R                  UR                  UR                  SU5        U R                  UR                  UR                  U R                  U5        U R	                  UR
                  UR
                  US:H  5      nU(       a  [        SU R                   SU 35        g g )Nr
   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r8   logitsr#   r   )r   r   r   past_seq_lenverbosemax_diffs         r   
eval_batchGpt2Metric.eval_batchQ   s    --y/E/Eq'R--y/E/EtzzSZ[##HOOY5E5E|WXGXY(2hZ@A r   c                    [         R                  " [         R                  " X5      5      (       d  US:X  aW  U(       a  [        SU R                   35        U =R
                  [         R                  " X5      R                  5       -  sl        g U(       a  [        SU SU R                   S35        U =R                  [         R                  " X5      R                  5       R                  S5      R                  SS9S:  -  sl        g g )Nr
   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)dimr   )
r>   alleqr#   r   r   logical_notr   sum	unsqueeze)r   baseline_topktreatment_topkr   rI   s        r   rD   Gpt2Metric._eval_topkY   s    yy-@AAz=dii[IJ%%-)P)\)\)^^%ug%=dii[Huv %%HH];GGIMMaPZZ_`Zadee% Br   c                     U =R                   U R                  R                  5       -  sl         U =R                  U R                  R                  5       -  sl        g r2   )r   r   rR   r   r   r   s    r   	end_batchGpt2Metric.end_batchh   sB    D115577D115577r   c                     US:  a"  [        [        R                  " U5      5      S-   OSnX0R                  ;  a  / U R                  U'   U R                  U   R	                  U5        g )Nr   r
   )intmathlog2r   append)r   rH   latencyr-   s       r   add_latencyGpt2Metric.add_latencyl   sX    2>2Bc$))L)*Q.***(*D  %S!((1r   )r   r   r   r   r   r   r   r   r   r   r   r   N)Torch   )T)__name__
__module____qualname____firstlineno__r   r#   r@   r8   r\   rA   rK   rD   rY   ra   __static_attributes__ r   r   r   r      s;    "><D Oc O
B82r   r   c            
           \ rS rSr   SS jrS\4S jrS rS rS	 r	\
SS
 j5       r\
S 5       r\
S 5       r\
\R                  SSSSSSSS4	S j5       rSrg)
Gpt2Testers   Frd   c                 *   UR                   S   U l        UR                   S   U l        X`l        Xl        X l        X0l        US LU l        US LU l        / U l	        SU R                  USXT-  /n[        U5       Hw  n[        R                  " U5      R                  U(       a  [        R                  O[        R                  5      nU R                  R!                  UR#                  U5      5        My     S U l        S U l        S U l        Xl        Xl        g )Nr   r
   r"   )shaper:   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger>   emptytypefloat16float32r_   torG   rE   rF   r   top_k_required_order)r   rr   rs   rt   num_attention_headshidden_size	num_layerdeviceis_fp16r   r~   
past_shape_i
empty_pasts                 r   r   Gpt2Tester.__init__t   s     $//!,%OOA. "(, ,D 8"0"< 	OO.

 	"BZ055wemmTYTaTabJIIZ]]623 #   
$8!r   returnc                 n    [        U R                  U R                  U R                  U R                  5      $ r2   )r   rr   rs   rt   rw   rX   s    r   
get_inputsGpt2Tester.get_inputs   s*    $..$*;*;T=P=PRVR[R[\\r   c           	      n  ^ SSK Jm  [        R                  R	                  US[        U5      -   5      n[        R                  R                  U5      (       a  [        SU S35        g [        R                  " USS9  U4S jn/ nU" XpR                  S	5        U R                  (       a  U" XpR                  S
5        U R                  (       a  U" XpR                  S5        [        U R                  5       H%  nU" XpR                   U   S[        U5      -   5        M'     [#        U5       H[  u  p[%        [        R                  R	                  USU S35      S5       n
U
R'                  U	R)                  5       5        S S S 5        M]     UR+                  5        Vs/ s H  o"R,                  PM     nn[#        U5       H  u  pTR/                  [1        WU   [2        R4                  5      (       a  X(   O-X(   R7                  5       R9                  5       R3                  5       5      n	[%        [        R                  R	                  USU S35      S5       n
U
R'                  U	R)                  5       5        S S S 5        M     [        SU 35        g ! , (       d  f       GMt  = fs  snf ! , (       d  f       M  = f)Nr   )numpy_helpertest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                    > U R                  TR                  UR                  5       R                  5       R	                  5       U5      5        g r2   )r_   
from_arrayclonecpunumpy)input_tensorstorch_tensorr   r   s      r   
add_tensor-Gpt2Tester.save_test_data.<locals>.add_tensor   s:      !8!89K9K9M9Q9Q9S9Y9Y9[]a!bcr   rr   rs   rt   past_input_z.pbwboutput_zTest data saved to directory )onnxr   ospathjoinstrexistsr#   makedirsrr   ru   rs   rv   rt   rx   rq   rw   	enumerateopenwriteSerializeToStringget_outputsr   r   
isinstancer   ndarrayr   r   )r   sessionoutputsave_test_data_dirtest_case_idr   r   r   itensorfoutput_names_namer   s                @r   save_test_dataGpt2Tester.save_test_data   s   %ww||.0@3|CT0TU77>>$Jtf$CDE
D4(	d =..+>  }&7&7H""}&9&9;KLt||$A}iilGc!f4DE % #=1IAbggll46!C94@A0023 A@ 2 3:2E2E2GH2G2GH!,/HA!,,'q	5==AA	vyGXG\G\G^GdGdGfF bggll471#S)9:DAQ0023 BA	 0 	-dV45 A@ I
 BAs    JJ  J%
J	%
J4	c                    [        US   [        R                  5      (       a  [        R                  " US   5      O.US   R                  5       R                  5       R                  5       U l        [        R                  U R                  5      U l        [        R                  U R                  U R                  U R                  5      U l        U R                  R                  5       R                  5       R                  U R                   S/5      R#                  U5      U l        U R&                  (       ad  [        R(                  " U R*                  U-   S-
  /5      R-                  S5      R/                  U R                   S5      R#                  U5      U l        U R2                  (       ap  [        R4                  " U R6                  [        R8                  " U R                   S/5      R;                  U R6                  5      /S5      R#                  U5      U l        / U l        [        US   [>        5      (       a  [A        US   5      U l        g[C        U RD                  5       H  n[        XS-      [        R                  5      (       a  [        R                  " XS-      5      O"XS-      R                  5       R                  5       nU R<                  RG                  UR#                  U5      5        M     g)z'
Update the inputs for next inference.
r   r
   N)$r   r   r   r>   
from_numpyr   detachr   rG   rl   predict_next_tokenrE   r   r~   rF   reshaper:   r}   rr   ru   r   rp   rS   repeatrs   rv   catrt   onestype_asrw   tuplelistrx   rq   r_   )r   r   stepr   r   past_is         r   updateGpt2Tester.update   s;   
 ,6fQi+O+OEVAY'U[\]U^UdUdUfUmUmUoUsUsUu 	 '99$++F&99$++tzzSWSlSlm**00299;CCT__VWDXY\\]cd  d//$6:;<FFqIPPQUQ`Q`bcdgghno  """'))''JJ34<<T=P=PQ # bj  	fQi''VAYDI4<<( "&Q-?? $$VE]3A,,.557 
 		  6!23 )r   c                    [        S5        U R                  bI  U R                  UR                  -
  R                  5       R                  5       nUS:  a  [        SU 35        [        R
                  " U R                  UR                  :H  5      (       d!  [        SU R                  UR                  5        U R                  (       aS  [        R
                  " U R                  UR                  :H  5      (       d!  [        SU R                  UR                  5        U R                  (       aS  [        R
                  " U R                  UR                  :H  5      (       d!  [        SU R                  UR                  5        [        U R                  5      [        UR                  5      :X  d   e[        U R                  5       H  u  p4UR                  UR                  U   R                  :X  d   eUR                  5       S:  d  MD  XAR                  U   -
  R                  5       R                  5       nUS:  d  My  [        S	U S
U 35        M     g)z#
Compare inputs and logits output.
zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r#   rG   r3   r4   r>   rO   rr   ru   rs   rv   rt   r(   rw   r   ro   nelement)r   r   max_io_diffr   r   max_past_diffs         r   r7   Gpt2Tester.diff   s   
 	o;;";;8==?CCEKT!<[MJKyy8+=+==>>*DNNH<N<NO  99T..(2G2GGHH/%%)) ""99T00H4K4KKLL1''++ 499~X]]!3333"499-IA<<8==#3#9#9999 1$!'--*:!: ? ? A E E G 4'N1#R?@ .r   c                     U SS2S4   nUS:X  a  [         R                  " USS5      nU$ [         R                  " USSS9SS2SU24   nU(       d  UR                  5       u  pgU$ U$ )z$
Get top k topkens based on logits.
Nr
   T)
descending)r>   argmaxargsortsort)rG   r   required_orderlastTokenLogitsgeneratedTokenstopksorted_topk_s           r   r   Gpt2Tester.predict_next_token  sj     !B-A:#ll?AtDO""=="Fq&5&yQD!!%""Kr   c                    / n[        U5       H  n[        XS-      [        R                  5      (       a  [        R
                  " XS-      5      OXS-      n[        XS-      [        R                  5      (       a  [        R
                  " XS-      5      OXS-      nXV-
  R                  5       R                  5       nUR                  U5        M     [        SU 35        g)z?
Compare the present outputs of two outputs from ONNX Runtime.
r
   zpresent_diff_max=N)
rx   r   r   r   r>   r   r3   r4   r_   r#   )onnx_outputonnx_io_outputrq   present_diff_maxr   onnx_present_ionnx_io_present_irJ   s           r   diff_presentGpt2Tester.diff_present+  s    
 wA ka%0%--@@   U!34 Q'  nU3U]]CC   A!67#E* 
 ':??AEEGH##H-   	!"2!345r   c                 H    SSK Jn  U" U 5      nSSKJn  UR                  U:H  $ )z.
Returns True if the ONNX model is quantized.
r   )load)__producer__)r   r   !onnxruntime.quantization.quantizer   producer_name)onnx_model_pathr   modelquantize_producers       r   is_quantized_onnx_model"Gpt2Tester.is_quantized_onnx_model@  s&    
 	_%W""&777r   Gpt2LMHeadModelT   r   .c                    [        SU SU SU S[        U5       SU	 S35        UR                  R                  nUR                  R                  nUR                  R
                  nUR                  R                  nSnU[        R                  :H  nU(       a#  SU R                  5       S   R                  ;   d   eUR                  5       R                  U5        [        R                  " S	S
SUR                  US9n[        R                  " UUUS9nSnU[        R                   :X  a  SOSn[#        UUU5      n[#        UUU5      n[#        US-   UU5      n[%        U5       GH  u  nnU	S:  a	  UU	:X  a    GOUS-  S:X  a  [        U 5        US   nUR'                  SS5      nUR'                  SS5      n[)        UUUUUUUUUU(       + 5
      n[)        UUUUUUUUUU(       + 5
      n [)        UUUUUUUSUU(       + 5
      n!U!R*                  n"UR-                  U"5        UR-                  U"5        [.        R0                  " 5          [.        R2                  " U"[.        R4                  S9n#[7        U5       GH  n$[9        UR:                  R=                  5       5      S   n%[9        UR>                  S   R=                  5       5      S   n&[@        RB                  " 5       n'[        RD                  " UU!RG                  5       5      n(URI                  U&[@        RB                  " 5       U'-
  5        U!RK                  U(U$U5        [        RL                  " U URG                  5       SS9u  n)n*URI                  U&U*S-  5        URK                  U)U$U5        [        R                  " U"U&U%UR                  US9n+[        RN                  " UU+5        [        RP                  " U U RG                  5       UU+SSSS9u  n,n*URI                  U&U*S-  5        UU:  a  U RS                  U U,UU5        US-  nU RK                  U,U$U5        U
(       au  URU                  U 5        [(        RW                  U)U,U5        [        S 5        [        S!U!RX                  5        [        S"URX                  5        [        S#U RX                  5        UR[                  U!UU&U
S$9  UR[                  U!U U&U
S$9  U#U!RX                  U:H  R]                  5       -  n#[.        R^                  " U#5      (       d  GM    O   SSS5        URa                  5         URa                  5         GM     UR                  5         UR                  5         UR                  5         g! , (       d  f       Nd= f)%z
Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
It will print top 1 and top k errors on the given test inputs.
zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   r{             )r:   past_sequence_lengthsequence_lengthconfigmodel_class)
is_float16rc   zQuantized OnnxOnnxz with IO Binding
   rr   rs   Nrt   Fr<   r
      )
total_runsr    )r   T)r   return_numpyinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)rI   )1r#   r(   r   rq   n_headn_embdeos_token_idr   FLOAT16r   rz   evalr}   r   get_output_shapesget_output_buffersINT8r   r   getrl   r:   rA   r>   no_gradr?   r@   rx   r   rr   sizerw   timeitdefault_timerpytorch_inferencer   ra   r   onnxruntime_inferenceauto_increase_buffer_size$onnxruntime_inference_with_binded_ior   r7   r   rE   rK   anyrO   rY   )-r   r   r   test_inputs	precisionr   r   top_k_no_order	max_steps
max_inputsrI   r   r   rq   r   r   r   test_data_savedr   init_output_shapesoutput_buffersr   r   torch_metriconnx_metriconnx_io_metricr   inputsrr   rs   rt   onnx_runneronnx_io_runnertorch_runnerr:   doner   seq_lenrH   
start_timepytorch_outputr   avg_latency_msoutput_shapesr   s-                                                r   test_generationGpt2Tester.test_generationL  sd   ( 	,UG3CNCSS^_h^iivwz  |G  xH  wI  IU  V`  Ua  ab  c	
 ,,&&$$$$||00)"3"33
 3 3 5a 8 = ==== 	

 (99!$<<#
 $667I6^hi-6)..-H)f!-F F#N5G$GX]^";/IAvA~!z/2v{{+I!::nd;L#ZZ(8$?N$""K (""N &""L &00J##J/&&z2{{:UZZ@!),D";#8#8#=#=#?@CG#'(8(8(;(@(@(B#CA#FL!'!5!5!7J%/%A%A%I`I`Ib%cN ,,\6;O;O;QT^;^_ ''fE2<2R2R!7!7!9a3/K  ++L.6:QR&&{D&A$.$@$@"$$/%M 88W
 #GG&113&%#$%*48&& #..|^f=TU&7&55g~Oacrs'1,")).$G#((8"//^WUo.i)B)BCh(@(@A68S8ST**<l\c*d"--lNLbi-j<#<#<#L"Q"Q"SSDyyu - !| !!#$$&c 0f 	G !s   K
U5U55
V	)rt   r:   rv   ru   rr   rp   rG   rq   rw   rs   rE   r   r~   rF   N)Frd   F)r
   F)re   rf   rg   rh   r   r   r   r   r   r7   staticmethodr   r   r   r   FLOAT32r#  ri   rj   r   r   rl   rl   s   s     ")9V]J ]%6N&4P%AN  $ 6 6( 	8 	8  ##%h hr   rl   )loggingr]   r   r&   r  r   r>   benchmark_helperr   gpt2_helperr   r   	getLoggerre   loggerr   rl   rj   r   r   <module>r,     sK      	     & .			8	$[2 [2|B Br   