
    hl                        S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SK	r	S SK
r
S SKrS SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJrJrJrJr  \ R>                  " \ 5      r!/ SQr"\RF                  S\RH                  S\RJ                  S0r& " S S\5      r' " S S\5      r( " S S\5      r) " S S\5      r* " S S\5      r+\*SS4\+SS4\)SS4S.r, " S S5      r- " S  S!5      r.g)"    N)Path)	Precision)float_to_float16_max_diff)FusionOptions)IOBindingHelper)	OnnxModel)optimize_model)torch_onnx_export)
GPT2ConfigGPT2LMHeadModel	GPT2ModelTFGPT2Model)
distilgpt2gpt2zgpt2-mediumz
gpt2-largezgpt2-xlMb@?g?g      @c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )GPT2ModelNoPastState'   2Here we wrap a class to disable past state output.c                 $   > [         TU ]  U5        g Nsuper__init__selfconfig	__class__s     j/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/gpt2/gpt2_helper.pyr   GPT2ModelNoPastState.__init__*            c                 "   > [         TU ]  USSS9$ )NF)	use_cachereturn_dict)r   forwardr   	input_idsr   s     r   r&   GPT2ModelNoPastState.forward-   s    wyEuMMr"    	__name__
__module____qualname____firstlineno____doc__r   r&   __static_attributes____classcell__r   s   @r   r   r   '   s    <!N Nr"   r   c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )TFGPT2ModelNoPastState1   r   c                 2   > SUl         [        TU ]	  U5        g )NF)r$   r   r   r   s     r   r   TFGPT2ModelNoPastState.__init__4   s      r"   c                     > [         TU ]  USS9$ )NF)r$   )r   callr'   s     r   r&   TFGPT2ModelNoPastState.forward8   s    w|I|77r"   r*   r+   r3   s   @r   r5   r5   1   s    <!8 8r"   r5   c                   H   ^  \ rS rSrSrU 4S jr\S 5       rU 4S jrSr	U =r
$ )MyGPT2Model<   zMHere we wrap a class for Onnx model conversion for GPT2Model with past state.c                 $   > [         TU ]  U5        g r   r   r   s     r   r   MyGPT2Model.__init__?   r!   r"   c           	         [        U S   S   [        [        45      (       a  [        U S   5      U:X  a  [        U S   S   5      S:X  d   e/ n[	        U5       HY  nUR                  [        R                  " U S   U   S   R                  S5      U S   U   S   R                  S5      4SS95        M[     U S   [        U5      4$ U $ )N   r      )dim)	
isinstancetuplelistlenrangeappendtorchcat	unsqueeze)result	num_layerpresentis       r   post_processMyGPT2Model.post_processB   s    fQilUDM22vay>Y.3vay|3D3IIIG9% II1a2215vay|A7P7PQR7ST & 1IuW~..r"   c                 x   > [         TU ]  UUUUSS9n[        R                  XPR                  R
                  5      $ NF)position_idsattention_maskpast_key_valuesr%   r   r&   r=   rR   r   n_layerr   r(   rV   rW   pastrN   r   s         r   r&   MyGPT2Model.forwardU   sB    %)  ! 
 ''0C0CDDr"   r*   )r,   r-   r.   r/   r0   r   staticmethodrR   r&   r1   r2   r3   s   @r   r=   r=   <   s+    W!  $E Er"   r=   c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )MyGPT2LMHeadModel`   zSHere we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state.c                 $   > [         TU ]  U5        g r   r   r   s     r   r   MyGPT2LMHeadModel.__init__c   r!   r"   c                 x   > [         TU ]  UUUUSS9n[        R                  XPR                  R
                  5      $ rU   rY   r[   s         r   r&   MyGPT2LMHeadModel.forwardf   sB    %)  ! 
 ''0C0CDDr"   r*   r+   r3   s   @r   r`   r`   `   s    ]!	E 	Er"   r`   c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )MyGPT2LMHeadModel_NoPaddingr   zHere we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and no padding.
When you always use batch_size=1 in inference, there is no padding in inputs. In such case, position_ids
and attention_mask need no be in inputs.
c                 $   > [         TU ]  U5        g r   r   r   s     r   r   $MyGPT2LMHeadModel_NoPadding.__init__x   r!   r"   c                 r   > [         TU ]  XSS9n[        R                  X0R                  R
                  5      $ )NF)rX   r%   rY   )r   r(   r\   rN   r   s       r   r&   #MyGPT2LMHeadModel_NoPadding.forward{   s0    eT''0C0CDDr"   r*   r+   r3   s   @r   rg   rg   r   s    
!E Er"   rg   logitsTF
last_state)r   GPT2LMHeadModel_NoPaddingr   c                   <    \ rS rSrS rS\4S jrS\4S jrS r	Sr
g)	
Gpt2Inputs   c                 4    Xl         X l        X0l        X@l        g r   )r(   rV   rW   r\   )r   r(   rV   rW   r\   s        r   r   Gpt2Inputs.__init__   s    +4.:WeFJ	r"   returnc                     U R                   U R                  U R                  4 Vs/ s H	  oc  M  UPM     nnU R                  (       a  UR	                  U R                  5        U$ s  snf r   )r(   rV   rW   r\   extend)r   v
input_lists      r   to_listGpt2Inputs.to_list   sT    "&..$2C2CTEXEX!Yk!YAa!Y
k99dii(	 ls
   A&A&c                 ~    [        S U R                  U R                  U R                  U R                  4 5       5      $ )Nc              3   .   #    U  H  oc  M  Uv   M     g 7fr   r*   ).0rx   s     r   	<genexpr>&Gpt2Inputs.to_tuple.<locals>.<genexpr>   s     u c1QQ cs   	)rF   r(   rV   rW   r\   )r   s    r   to_tupleGpt2Inputs.to_tuple   s3    u1B1BDDWDWY]YbYb cuuur"   c                    S nU R                   b[  U R                   R                  [        R                  :X  a'  U R                   R	                  [        R
                  S9OU R                   nU R                   Vs/ s H  o"R	                  [        R
                  S9PM!     nn[        U R                  U R                  X5      $ s  snf )N)dtype)
rW   r   rK   float16tofloat32r\   rq   r(   rV   )r   rW   pr\   s       r   to_fp32Gpt2Inputs.to_fp32   s    * ''--> ##&&U]]&;((  4899=9a5==)9=$..$*;*;^RR >s   9&C)rW   r(   r\   rV   N)r,   r-   r.   r/   r   rG   rz   rF   r   r   r1   r*   r"   r   rq   rq      s(    K v% vSr"   rq   c            "          \ rS rSrSr\SSS\R                  \R                  \R                  S4S\S\S\S\S	\S
\S\S\R                  S\
S\
S\
S\R                  S\R                  S\R                  S\
S\4 S jj5       r\ SCS\S\S\S\S\S\\\\   4   4S jj5       r\S 5       r\SDS j5       r\SDS j5       r\SES j5       r\SFS j5       r\SSSS\R                  \R                  \R                  4S\S \
S!\
S\
S\
S\R                  S\R                  S\R                  4S" jj5       r\   SGS$ j5       r\/ S%Q4S&\S'\\   4S( jj5       r\SHS)\S*\4S+ jj5       r\SHS)\S*\4S, jj5       r\S- 5       r\SIS. j5       r\   SJS)\S/\\\R@                  4   S0\\\\   4   S*\S1\
S2\
4S3 jj5       r!\S4 5       r"\S5 5       r#\SS6S6S7S8SSSS\R                  \R                  \R                  S#SS4S9 j5       r$\SS:SSSS\R                  \R                  \R                  S;S8S<4S= j5       r%\SKS> j5       r&\SSS/ S?Q4S\4S@ jj5       r'SAr(gB)L
Gpt2Helper   zEA helper class for Gpt2 model conversion, inference and verification.FT
batch_sizepast_sequence_lengthsequence_lengthnum_attention_headshidden_sizerO   
vocab_sizedevicer   has_position_idshas_attention_maskinput_ids_dtypeposition_ids_dtypeattention_mask_dtypeleft_side_paddingru   c           	         U(       a  [         R                  O[         R                  nSU UU[        XC-  5      /n[	        U5       Vs/ s H  n[         R
                  " UXS9S-  S-
  PM      nn[         R                  " SUS-
  X4UUS9nSnU
(       al  X-   n[         R                  " U U/UUS9nUS:  aJ  [	        U 5       H;  n[        R                  " SUS-
  5      nU(       a  SUUSU24'   M/  SUUUU-
  S24'   M=     SnU	(       aQ  UR                  5       R                  S	5      S-
  nUR                  US:  S5        USS2US24   R                  U5      n[        UUUU5      $ s  snf )
zCreate random inputs for GPT2 model.
Returns torch tensors of input_ids, position_ids, attention_mask and a list of past state tensors.
rC   r   r   g       @      ?r   rB   )lowhighsizer   r   N)rK   r   r   intrI   randrandintonesrandomlongcumsummasked_fill_r   rq   )r   r   r   r   r   rO   r   r   r   r   r   r   r   r   r   
float_type
past_shape_r\   r(   rW   total_sequence_lengthrQ   padding_lengthrV   s                            r   get_dummy_inputsGpt2Helper.get_dummy_inputs   s   * '.U]]5==
 12

 `een_op_oZ[JjH3NQTT_opMMa.!
	 $8$J!"ZZ23*N %)z*A%+^^A7Lq7P%QN(=>q/>/'9:VWq*?.*P*R'RS + )..077;a?L%%lQ&6:'+?+@(@ADDEWXL)\>4HHC qs   %E'r   r   model_classc                 
   UR                   nUR                  nUR                  nUR                  n[        U   S   n	U UU	S:X  a  UOU/n
SU UX-   [        Xe-  5      /nX0n[        U5       H  nXS[        U5      -   '   M     U$ )zAReturns a dictionary with output name as key, and shape as value.rB   rm   rC   present_)r   r   num_hidden_layersr   MODEL_CLASSESr   rI   str)r   r   r   r   r   r   r   rO   r   output_namelast_state_shapepresent_state_shapeoutput_shapesrQ   s                 r   get_output_shapesGpt2Helper.get_output_shapes   s     %88((,,	&&
#K03 %1J{
  212
 %7y!A1D*s1v-. " r"   c                    U H|  nX ;   d   eX   n[         R                  " X   5      UR                  5       :  d  M:  [        R                  " [         R                  " X   5      UR
                  UR                  S9X'   M~     g )Nr   )numpyprodnelementrK   emptyr   r   )output_buffersr   keybuffers       r   auto_increase_buffer_size$Gpt2Helper.auto_increase_buffer_size  si     C(((#(Fzz-,-0AA&+kkJJ}12 ,,!=='#	 !r"   c                     U(       a  [         R                  O[         R                  n0 nU R                  5        H0  u  pV[         R                  " [
        R                  " U5      X1S9XE'   M2     U$ )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.r   )rK   r   r   itemsr   r   r   )r   r   
is_float16	data_typer   nameshapes          r   get_output_buffersGpt2Helper.get_output_buffers  sQ     &0EMMU]]	(..0KD#(;;uzz%/@	#aN  1r"   c                    U S   R                  5       R                  5       n[        R                  " X1S   -
  5      nU(       a0  [        R                  " U[        R                  " U5      S-   -  5      $ [        R                  " U5      $ )zGReturns the maximum difference between PyTorch and OnnxRuntime outputs.r   ư>)cpur   absamax)torch_outputsort_outputsrelativeexpected_outputsdiffs        r   diff_outputsGpt2Helper.diff_outputs%  sl     )+//1779yy)N:;::deii0@&AD&HIJJ::d##r"   c           	      $   [         R                  " US   U S   R                  5       R                  5       X#S9n[        R	                  SU 35        Un[        U5      S-
  n[        U5       Hk  n[         R                  " USU-      U S   U   R                  5       R                  5       UUS9n[        R	                  SU SU SU 35        U=(       a    UnMm     U(       d.  [        R                  X5      n	[        R                  SU	S	 35        U$ )
zReturns True if torch and ORT outputs are close for given thresholds, and False otherwise.
Note: need kwargs since Gpt2BeamSearchHelper.compare_outputs has an extra parameter model_class
r   )rtolatolz9PyTorch and OnnxRuntime output 0 (last_state) are close: rB   zPyTorch and OnnxRuntime layer z state (present_z) are close:z@PyTorch and OnnxRuntime results are not all close: max_abs_diff=.5f)
r   allcloser   loggerdebugrH   rI   r   r   info)
r   r   r   r   kwargsis_closeis_all_close
num_layerslayermax_abs_diffs
             r   compare_outputsGpt2Helper.compare_outputs/  s   
 >>+a.-2B2F2F2H2N2N2PW[gPQYPZ[\%)
:&E~~AI&a '++-335	H LL9%@PQVPWWcdlcmno'4HL ' %22=NLKKZ[ghkZlmnr"   c                    SnSn/ n/ n[        [        U5      5       GH  nX   nUS:X  a  U S   O
U S   US-
     R                  5       R                  5       n	[        R                  " XUSS9n
UR                  [        R                  " [        R                  " X-
  5      5      5        U=(       a    U
n[        R                  " U	5      R                  5       (       a  [        R                  SU S35        [        R                  " U	5      R                  5       (       a  [        R                  SU S35        [        R                  " U5      R                  5       (       a  [        R                  S	U S35        [        R                  " U5      R                  5       (       a  [        R                  S	U S35        [        R                  " X-
  5      n[        R                  " UR                  5       UR                   5      nUR                  S
X   S SU SX   S S[#        X   5      S 35        US:X  d  GM  [        R                  " [        R                  " USS9UR                   5      n[        R                  " [        R                  " U	SS9U	R                   5      n[        R$                  " X5      nGM     UR'                  [)        U5      5      nU[)        U5      UUU4$ )a  Compare outputs from PyTorch and OnnxRuntime

Args:
    torch_outputs (Tuple[Torch.Tensor]): PyTorch model output
    ort_outputs (List[numpy.ndarray]): OnnxRuntime output
    atol (float, optional): Absolute tollerance. Defaults to 1e-06.

Returns:
    is_all_close(bool): whether all elements are close.
    max_abs_diff(float): maximum absolute difference.
    messages(str): a list of debug message for each output
TFr   rB   )r   r   zPyTorch output z has nanz has infzORT output zdiff=z.9fz index=z ort=z torch=N)axis)rI   rH   r   r   r   rJ   r   r   isnananyr   r   isinffabsunravel_indexargmaxr   floatarray_equalindexmax)r   r   r   r   is_top1_matched	max_diffsmessagesrQ   
ort_outputtorch_outputr   r   idxort_max_indextorch_max_indexmax_diff_output_indexs                   r   compare_outputs_v2Gpt2Helper.compare_outputs_v2J  s[    	s;'(A$J01QM!,M!<LQQRU<SXXZ``bL~~jTPQRHUZZ		,2K(LMN'4HL{{<(,,..qc:;{{<(,,..qc:;{{:&**,,{1#X67{{:&**,,{1#X67::j78D%%dkkmTZZ@COO	#gcU%
7LGTYZfZkTlmpSqr Av % 3 3ELLRV4WYcYiYi j"'"5"5ell<VZ6[]i]o]o"p"'"3"3M"S3 )6 !*I ?	N!
 	
r"   onnx_model_pathverboseuse_external_data_formatc
                    U R                   n
U
R                  n[        R                  SSSU
R                  U
R
                  UU
R                  USUUUUU	S9nUR                  5       n[        R                  " 5          U " U6 nSSS5        [        U5       Vs/ s H  nSU 3PM
     nn[        U5       Vs/ s H  nSU 3PM
     nnWS   R                  S   U
R                  :X  d"  US   R                  S   U
R
                  :X  d   eUS   R                  S   U
R                  :X  a  S	OS
/UQnSSSS.US   SSS.0nU H  nSSS.UU'   M     U H  nSSS.UU'   M     S/nU(       a  SSS.US'   UR                  S5        U(       a  SSS.US'   UR                  S5        UR                  U5        [        U5      S:X  a  [        US   5      U:X  d   e[        R!                  SUR"                  R                   SUR$                  S   R                   SUS   R                   SUS   S   R                   35        ['        U5      R(                  R+                  SSS9  U(       a  [,        R.                  " 5        n[0        R2                  R5                  US5      n['        U5      R(                  R+                  SSS9  [7        U [9        U5      USUUUSSSUS9  [:        R<                  " USS9n [>        R@                  " U USSS9  SSS5        g[7        U [9        U5      USUUUSSSUS9  g! , (       d  f       GN= fs  snf s  snf ! , (       d  f       g= f)z1Export GPT-2 model with past state to ONNX model.rB   F)r   r   r   r   r   rO   r   r   r   r   r   r   r   r   Npast_r   r   rC   rm   rn   r(   r   seq_len)r   rB   past_seq_len)rB      total_seq_lenrV   rW   zShapes: input_ids=z past=z output=z	 present=T)parentsexist_okz	gpt2.onnx   )
argsfexport_paramsinput_namesoutput_namesdynamic_axesopset_versiondo_constant_foldingr  r  )load_external_data)save_as_external_dataall_tensors_to_one_file)!r   rZ   r   r   r   r   r   rz   rK   no_gradrI   r   rJ   rw   rH   r   r   r(   r\   r   parentmkdirtempfileTemporaryDirectoryospathjoinr
   rF   onnx
load_modelr   save)modelr   r  r  r  r   r   r   r   r   r   rO   dummy_inputsry   outputsrQ   
past_namespresent_namesr  r  r   r  tmp_dir_nametemp_onnx_model_paths                           r   export_onnxGpt2Helper.export_onnx  s    #\\NN	!22!" & : :**((-1+1!5 3 
  "))+
]]_Z(G  ,1+;<+;aaSk+;
<16y1AB1AA8A31AB qz"f&7&7771:;K;KA;NRXRdRd;ddd$+AJ$4$4Q$76;L;L$LR^oano \i8O)<
 D%1n!EL !D%1o!FL " #m/;	+JL(~.1=/-RL)*/0:&7|q S_	%AAA !7!7!=!= >f\EVEVWXEYE_E_D``hipqrisiyiyhz  {D  EL  MN  EO  PQ  ER  EX  EX  DY  Z	
 	_$$**4$*G#,,.,')ww||L+'N$)*1177t7T!z**"& +!-!-"$(,-1# (<QUV#*.,0	' /.4 :&!"')) $().Q _ =BT /.s%   <L1M5M?BM1
M 
Mr   c           
          [        S5      n	[        U SUUSU	SS9n
U(       a<  U(       a  [        R                  U
5        OSU;  a  SUS'   U
R                  " SSS0UD6  U
R                  X5        U
$ )	zHOptimize ONNX model with an option to convert it to use mixed precision.r   r   F)
model_type	num_headsr   	opt_leveloptimization_optionsuse_gpukeep_io_typesuse_symbolic_shape_inferTr*   )r   r	   r   auto_mixed_precisionconvert_float_to_float16save_model_to_file)r  optimized_model_pathr   r   r   r  r5  stager   r1  ms              r   optimize_onnxGpt2Helper.optimize_onnx  s      -V4)#!5
 #//2"&0.3F?+**SDSFS	1Lr"   )AddLayerNormalizationSkipLayerNormalizationFastGeluEmbedLayerNormalization
onnx_modelop_block_listc                    U R                  5        Vs1 s H  o"R                  iM     nn[        U5      nUR                  U5      n[        R                  SU SU 35        U R                  5       R                  S   R                  nSnU R                  5       nXh;   d   eX   nSn	UR                  S:X  a  Un	[        R                  SUR                   35        Sn
UR                   H  nU R                  U5      n
U
c  M    O   [        U
5      n[        R                  SUR                   S	U 35        US
:  nO/[        R                  SUR                   SUR                   35        / n/ nU(       d  U	b  U/nU	R                  /nUUUUS.n[        R                  SU 35        U R                  " SSS0UD6  U$ s  snf )a  Convert GPT-2 model to mixed precision.
   It detects whether original model has fp16 weights, and set parameters for float16 conversion automatically.
Args:
    onnx_model (OnnxModel): optimized ONNX model
    op_block_list (List[str], optional): operators to compute in fp32. Defaults to ["Add", "LayerNormalization",
                                         "SkipLayerNormalization", "FastGelu", "EmbedLayerNormalization"]
Returns:
    parameters(dict): a dictionary of parameters used in float16 conversion
z	fp32 op: z
 fp16 op: r   FNMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node : r   z-Failed to find MatMul node for logits. Found z	 of node )r3  rC  node_block_listforce_fp16_initializersz!auto_mixed_precision parameters: r4  Tr*   )nodesop_typeset
differencer   r   graphoutputr   output_name_to_nodeinputget_initializerr   r   warningr6  )rB  rC  nodeop_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionrO  last_matmul_nodeinitializerrP  max_diffr3  rG  
parameterss                   r   r5  Gpt2Helper.auto_mixed_precision  s   ( 1;0@0@0BC0B||0BC-(!,,[9i}J{mDE (--/66q9>> $) (<<>!888"6<<8##KK=dii[IJK(88?* $ 1=HLLNtyykY[\d[efg'/$$NNJ4<<.Xabfbkbkalmn(/?/K/0M/445O +*.'?	

 	7
|DE++XTXZXa Ds   F>inputs
total_runsc                    [         R                  S5        UR                  5       R                  5       n[        R
                  " 5          U " U6 nSSS5        US:X  a  W$ / n[        R
                  " 5          [        U5       HD  n[        R                  " 5       nU " U6 nUR                  [        R                  " 5       U-
  5        MF     SSS5        [        U5      S-  [        U5      -  n[         R                  SR                  [        US5      5      5        WU4$ ! , (       d  f       N= f! , (       d  f       Nk= f)zfRun inference of PyTorch model, and returns average latency in ms when total_runs > 0 besides outputs.zstart pytorch_inferenceNr     zPyTorch inference time = {} ms.2f)r   r   r   rz   rK   r  rI   timerJ   sumrH   format)	r$  r^  r_  ry   r&  latencyr   startaverage_latencys	            r   pytorch_inferenceGpt2Helper.pytorch_inferenceb  s     	./ ^^%--/
]]_Z(G  ?N]]_:&		,tyy{U23 '  g,-G<5<<VOUZ=[\]''! _ _s   	D6AD/
D,/
D=c                 ,   [         R                  S5        S[        R                  " UR                  R                  5       R                  5       5      0nUR                  bV  [        UR                  5       H=  u  pE[        R                  " UR                  5       R                  5       5      USU 3'   M?     UR                  b?  [        R                  " UR                  R                  5       R                  5       5      US'   UR                  b?  [        R                  " UR                  R                  5       R                  5       5      US'   U R                  SU5      nUS:X  a  U$ / n[        U5       HQ  n[        R                  " 5       n	U R                  SU5      nUR                  [        R                  " 5       U	-
  5        MS     [        U5      S-  [        U5      -  n
[         R                  S	R!                  [!        U
S
5      5      5        Xj4$ )zcRun inference of ONNX model, and returns average latency in ms when total_runs > 0 besides outputs.zstart onnxruntime_inferencer(   Nr  rW   rV   r   ra  z"OnnxRuntime Inference time = {} msrb  )r   r   r   ascontiguousarrayr(   r   r\   	enumeraterW   rV   runrI   rc  rJ   rd  rH   re  )ort_sessionr^  r_  
ort_inputsrQ   past_ir   rf  r   rg  rh  s              r   onnxruntime_inference Gpt2Helper.onnxruntime_inference|  s    	23!5#:#:6;K;K;O;O;Q;W;W;Y#Z[
;;"&v{{3	*/*A*A&**,BTBTBV*W
U1#;' 4   ,+0+B+B6CXCXC\C\C^CdCdCf+gJ'(*).)@)@ATATAXAXAZA`A`Ab)cJ~&!oodJ7?z"AIIKE%//$
;KNN499;./ #
 g,-G<9@@Y^A_`a++r"   c           	      :    [         R                  " U UUUUUU5      $ )z)Returnas IO binding object for a session.)r   prepare_io_binding)ro  r(   rV   rW   r\   r   r   s          r   ru  Gpt2Helper.prepare_io_binding  s,     11
 	
r"   c                 0    [         R                  " XX#5      $ )z3Copy results to cpu. Returns a list of numpy array.)r   "get_outputs_from_io_binding_buffer)ro  r   r   return_numpys       r   rx  -Gpt2Helper.get_outputs_from_io_binding_buffer  s     AA
 	
r"   r   r   ry  include_copy_output_latencyc           	      r   [         R                  S5        [        R                  U UR                  UR
                  UR                  UR                  UU5      nU R                  U5        [        R                  XX55      nUS:X  a  U$ / n	[        U5       Hm  n
[        R                  " 5       nU R                  U5        U(       a  [        R                  XX55      n
U	R                  [        R                  " 5       U-
  5        Mo     [        U	5      S-  [        U	5      -  n[         R                  SU5        X4$ )zUInference with IO binding. Returns outputs, and optional latency when total_runs > 0.z*start onnxruntime_inference_with_binded_ior   ra  z4OnnxRuntime with IO binding inference time = %.2f ms)r   r   r   ru  r(   rV   rW   r\   run_with_iobindingrx  rI   rc  rJ   rd  rH   )ro  r^  r   r   r_  ry  r{  
io_bindingr   rf  r   rg  rh  s                r   $onnxruntime_inference_with_binded_io/Gpt2Helper.onnxruntime_inference_with_binded_io  s    	AB  22!!KK

 	&&z2 !CC
 ?z"AIIKE**:6*AA NN499;./ # g,-G<K_]++r"   c                 h   [        SU  S3S5       n[        R                  " X5        S S S 5        [        R	                  SU  S35        [        SU  S3S5       n[        R                  " X#5        S S S 5        [        R	                  SU  S35        g ! , (       d  f       Np= f! , (       d  f       N9= f)Nort_outputs_.picklewbz$ORT output are saved to ort_outputs_torch_outputs_z(Torch output are saved to torch_outputs_openpickledumpr   r   )rQ   r   r   r  s       r   save_outputsGpt2Helper.save_outputs  s    L7+T2aKK' 3:1#WEFN1#W-t4KK) 5>qcIJ 32 54s   BB#
B #
B1c                     [        SU  S3S5       n[        R                  " X5        S S S 5        [        R	                  SU  S35        g ! , (       d  f       N(= f)Ndummy_inputs_r  r  z!inputs are saved to dummy_inputs_r  )rQ   r%  r   r   r  s        r   save_inputsGpt2Helper.save_inputs  sG    M!G,d3qKK( 47s'BC 43s   A


Ar   i'  rB   c                    UR                   n[        R                  SU SU SU SU SU	 SU S35        SnS	nS
nSnU(       a/  [        R	                  UUUUU	5      n[        R                  UX#5      nSnSn/ nS/U-  nXg-  n[        U5       GHs  n[        UU-  5      n[        R                  " SU5      nUS:X  a  SO[        R                  " SU5      n [        R                  " SU5      n![        R                  SU! SU  S35        [        R                  U!U UUR                  UR                  UR                  UR                  UUU
UUUUSS9n"[        R!                  UU"5      n#U(       a  [        R#                  U U"5      n$O1[        R	                  U!U UUU	5      n%[        R%                  U U"UU%5      n$[        R'                  U#U$US9u  n&n'n(n)n*[(        R*                  " U'5      (       d  UR-                  U'5        U&(       a  US-  nU*(       a  US-  nUU==   S-  ss'   U(       ay  U&(       dr  [        R                  SU SU! SU  SU SU' 3
5        [/        U)5       H?  u  nn+[        R                  SU SU R1                  5       U   R2                   SU+ 35        MA     U(       d  GM  [(        R*                  " U'5      (       d  U'SU-  :  d  GMF  [        R5                  UU"5        [        R7                  UU$U#5        GMv     U(       a.  S V,s0 s H   n,SU, 3[(        R8                  " UU,5      S _M"     n-n,OS V,s0 s H	  n,SU, 3S_M     n-n,US -  U-  U-S!'   U V.s/ s H  n.U.S -  U-  PM     sn.U-S"'   US -  U-  U-S#'   U[;        U5      -
  S -  U-  U-S$'   [        R                  S%U S&U S'U[;        U5      -
   S(U 35        US)U-  :  a)  [        R                  S*[        US-  U-  5      S+ S,35        U-$ s  sn,f s  sn,f s  sn.f )-zKGenerate random inputs and compare the results of PyTorch and Onnx Runtime.zRunning parity test (atol=z, test_cases=z, runs=z, use_io_binding=z, model_class=z, is_float16=z) ...      rC   Nr   rB   z#Running parity test for batch_size=z past_sequence_length=z...T)r   r   r   r   )r   z
test_case=z batch_size=z sequence_length=z	 MaxDiff=	z: Name=z, d   )2   Z   _   c   max_diff_percentile_r   nanr   top1_match_ratetop1_match_rate_per_rundiff_pass_ratenan_ratezParity Test Cases=z	; Passed=z; Nan=z; Top1_Matched=gffffff?zParity is good: passed rate=z.0f%)r   r   r   r   r   r   rI   r   r   r   r   r   r   r   rZ   r   ri  rr  r  r   r   r   rJ   rm  get_outputsr   r  r  
percentilerH   )/ro  r$  r   r   r   r   test_cases_per_runr_  use_io_bindingr   r   r   r   r   r   r9  r  enable_pickle_outputr   max_batch_sizemax_past_seq_lenmax_seq_lenr   max_output_shapespassed_test_casestop1_matched_casesmax_abs_diff_listtop1_matched_cases_per_runtotal_test_casesrQ   run_idr   r   r   r%  r&  r   r   r   r   r   r   r   messager   rN   xs/                                                  r   test_parityGpt2Helper.test_parity  s   . #\\(m<N;OwWaVbbs  uC  tD  DR  S^  R_  _l  mw  lx  x}  ~	
  * < < 0+v{! (::;LfaN&'S:%5"-:'(A//0F$nnQ<O).!1&..L\:] >:JLL5j\AWXlWmmpq &66$**""!! " /#5%9"& 7 L" !225,GG(>>{LY * < <(#! )MM~} --g{-N%;;|,,!((6!Q&!"a'"*62a72| <
|;QRfQggx  zI  yJ  JS  T`  Sa  b #,H"5JAwKK"QCw{/F/F/H/K/P/P.QQST[S\ ]^ #6 $#\)B)BlUX[_U_F_&&q,7'';@G )J eueu`a&qc*u/?/?@QST/UVY.Z[eu  F BRRAQA,QC0%7AQFR$6$<?O$O !Sm,nSmaQW7I-ISm,n()#4s#:=M#M .5F1GG3NQaaz !1 2)<M;NfUehkl}h~U~T  @O  Pb  Oc  d	
 t&666KK6s;Ls;RUe;e7fgj6kklmn% S -os   'O;?P  Pr  r      c                    UR                   nSnU(       a-  [        R                  XXU5      n[        R                  UX#5      n[        R	                  UUUUR
                  UR                  UR                  UR                  UUUUU	U
US9nU(       a  [        R                  U UU5      u  nnU$ [        R                  U UUWU5      u  nnU$ )zCGenerate random inputs and measure average latency of Onnx Runtime.N)r   r   r   )r   r   r   r   r   r   r   rZ   r   rr  r  )ro  r$  r   r   r_  r  r   r   r   r   r   r   r   r   r   r   r   r   r%  r   rf  s                        r   test_performanceGpt2Helper.test_performance  s    ( #\\&88/;M (::=&]N!22 &&NN+1!5 3 
" #99+|U_`JAw 	 $HH\>=*JAw r"   c                     [         R                  SSSUR                  UR                  UR                  UR
                  USUUS9R                  5       n[        R                  R                  X5      $ )zJIT trace for TorchScript.rB   F)r   r   r   r   r   rO   r   r   r   r   r   )
r   r   r   r   rZ   r   rz   rK   jittrace)r$  r   r   r   r   ry   s         r   torchscriptGpt2Helper.torchscript  sq      00!" & : :**nn((-1 1 
 ') 	 yyu11r"   rawfp32fp16int8c           
         Un[         R                  R                  U5      (       a  [        U5      R                  S   nOUR                  S5      S     US:w  a  USU-   -  nU(       a  US-  nU(       Ga  SSSS	S
.nS
 H  n[         R                  R                  XXx   -   5      n	[         R                  R                  U	5      (       d  MM  X;   a1   [        R                  " U	5        [        R                  SU	 35        M  [        R                  SU SU	 35        M     [         R                  R                  [         R                  R                  X5      US-   5      [         R                  R                  [         R                  R                  XS-   5      US-   5      [         R                  R                  [         R                  R                  XS-   5      US-   5      [         R                  R                  [         R                  R                  XS	-   5      US-   5      S
.$ [         R                  R                  XS-   5      [         R                  R                  XS-   5      [         R                  R                  XS-   5      [         R                  R                  XS-   5      S
.$ ! [         a1  n
[        R                  SU	 SU
R                   35         Sn
A
GMg  Sn
A
ff = f)z=Build a  path name for given model based on given attributes.r   /r   r   _past _fp32_fp16_int8r  zRemoved the existed directory: zFailed to remove the directory rF  NzDirectory for z
 existed: z.onnxz
_fp32.onnxz
_fp16.onnxz
_int8.onnx)r  r  isdirr   partssplitr   existsshutilrmtreer   r   OSErrorstrerror)
output_dirmodel_name_or_pathr   has_past
new_folderremove_existing
model_namesuffixr.  new_dires              r   get_onnx_pathsGpt2Helper.get_onnx_paths  sP    (
77==+,,0177;JS!"%++#++J'!J'7SF=
'',,z@R3RS77>>'**!4c"MM'2"KK*I'(ST nZL
7)$TU > ww||BGGLL$H*W^J^_GGLL'-AB- GGLL'-AB- GGLL'-AB- " 77<<
,@AGGLL,-FGGGLL,-FGGGLL,-FG	
 	
-  ' c"KK*I'RTUVU_U_T`(abbcs   .J00
K+:%K&&K+r*   N)r   )F)MbP?r  )r   )FFr   )r   )T)r   TF)TT))r,   r-   r.   r/   r0   r^   rK   int32r   r   boolr   rq   r   r   r   dictrG   r   r   r   r   r   r   r+  r;  r   r5  ri  rr  ru  rx  Tensorr  r  r  r  r  r  r  r1   r*   r"   r   r   r      s   O !%#'',{{*/++,1KK"&>I>I!>I >I !	>I
 >I >I >I >I >I >I !>I >I "KK>I $kk>I  >I  
!>I >I@  -  !    	 
   
c49n	   D 	 	   $ $  4 3
 3
j 
 ).!%#'',{{*/++,1KKu u 	u
 #'u u !u u "KKu $kku un  "'"! !F $
CCCyC CJ ( ( ( (2 ,: ,3 , ,> 
 
( 
 
  !,10,0, S%,,./0, CcN+	0,
 0, 0, &*0, 0,d K K D D
 
  % ;;"[["%E EN 
 % ;;"[[4 4l 2 2"  -7:
 :
 :
r"   r   )/loggingr  r  r   r  r  rc  pathlibr   r   r!  rK   benchmark_helperr   r   r   fusion_optionsr   io_binding_helperr   rB  r   	optimizerr	   torch_onnx_export_helperr
   transformersr   r   r   r   	getLoggerr,   r   PRETRAINED_GPT2_MODELSFLOAT32FLOAT16INT8DEFAULT_TOLERANCEr   r5   r=   r`   rg   r   rq   r   r*   r"   r   <module>r     s    	          & - ( -   $ 6 L L			8	$W  vsNNC N9 N8[ 8!E) !EHE E$E/ E" *8T:"=x!O|T2S S>_
 _
r"   