
    h(                       S r SSKrSSKrSSKrSSKrSSKrSSKJr  SSKJ	r	  SSK
Jr  SSKrSSKrSSKrSSKJrJr  SSKJr  SSKJrJrJr  SS	KJr  SS
KJrJrJrJrJrJ r J!r!J"r"  SSK#J$r$J%r%J&r&J'r'  SSK(J)r*  SSK+J,r,  SSK-J.r/  SSK0J1r1J2r2  \Rf                  " S5      r4 " S S\5      r5S_S\6\7   S-  S\Rp                  4S jjr9S\Rp                  4S jr:S\Rp                  4S jr;S`S\7S\<4S jjr=S`S\7S\<S\<4S jjr>S\7S\<S\<S\%4S  jr?S!\R*                  S"\4S# jr@S!\R*                  S"\4S$ jrAS!\R*                  S"\4S% jrB    SaS&\S'\S(\7S)\CS*\DS-  S+\DS-  4S, jjrES-\S.\4S/ jrF SbS!\S)\CS\6\   4S0 jjrGS1 rHS2 rIS3 rJS4\4S5 jrKS4\S6\<S7\<S\<4S8 jrLS4\4S9 jrMS:\S;\74S< jrNS/ 4S:\S=\CS>\6\C   4S? jjrOS:\4S@ jrPS:\S;\74SA jrQ   ScS:\SB\7SC\CSD\CSE\C4
SF jjrRS4\4SG jrSS4\4SH jrTSI\4SJ jrUS`SK\7S\<4SL jjrV S`SK\7SM\7S\<S\<4SN jjrWSO rX\5R                  4S\Rp                  SP\54SQ jjrZS\Rp                  S:\\!-  SR\R                  SS\R                  ST\CSU\CSV\6\6\C      S\D\7\4   4SW jr\SX r]  SdS\Rp                  SY\6\7   S-  SZ\<4S[ jjr^S_S\Rp                  SY\6\7   S-  4S\ jjr_SeS\6\7   S-  SY\6\7   S-  4S] jjr)\`S^:X  a  \)" 5         gg)fa  
This converts GPT2 or T5 model to onnx with beam search operator.

Example 1: convert gpt2 model with beam search:
    python convert_generation.py -m gpt2 --output gpt2_beam_search.onnx

Example 2: convert gpt2 model with beam search containing specific cuda optimizations:
    python convert_generation.py -m gpt2 --output gpt2_beam_search.onnx --use_gpu                       --past_present_share_buffer --use_decoder_masked_attention

Example 3: convert gpt2 model with beam search with mixed precision and enable SkipLayerNorm strict mode:
    python convert_generation.py -m gpt2 --output gpt2_beam_search.onnx --use_gpu -p fp16 --use_sln_strict_mode

Example 4: convert T5 model with beam search in two steps:
    python -m models.t5.convert_to_onnx -m t5-small
    python convert_generation.py -m t5-small --model_type t5                     --decoder_onnx ./onnx_models/t5-small_decoder.onnx                       --encoder_decoder_init_onnx ./onnx_models/t5-small_encoder.onnx          --output ./onnx_models/t5_small_beam_search.onnx

Example 5: convert T5 model with beam search. All in one step:
    python convert_generation.py -m t5-small --model_type t5 --output t5_small_beam_search.onnx

Example 6: convert T5 model with beam search containing specific cuda optimizations. All in one step:
    python convert_generation.py -m t5-small --model_type t5 --output t5_small_beam_search.onnx           --use_gpu --past_present_share_buffer --use_decoder_masked_attention

Example 7: convert MT5 model with external data file like mt5-base-beamsearch.onnx.data in below example.
    python convert_generation.py -m google/mt5-base --model_type mt5 --output mt5-base-beamsearch.onnx -e

Example 8: convert gpt2 model with greedy search:
    python convert_generation.py -m gpt2 --output gpt2_greedy_search.onnx --num_beams 1 --num_return_sequences 1

Example 9: convert gpt2 model with sampling:
    python convert_generation.py -m gpt2 --output gpt2_sampling.onnx --num_beams 1 --num_return_sequences 1 --top_p 0.6
    N)Enum)Path)Any)	Precisionsetup_logger)NumpyHelper)
GraphProto
ModelProtoTensorProto)	OnnxModel)
GPT2ConfigGPT2LMHeadModelGPT2Tokenizer	MT5ConfigMT5ForConditionalGenerationT5ConfigT5ForConditionalGenerationT5Tokenizer)GraphOptimizationLevelInferenceSessionSessionOptionsget_available_providers)main)PRETRAINED_GPT2_MODELS)export_onnx_models)PRETRAINED_MT5_MODELSPRETRAINED_T5_MODELS c                   &    \ rS rSrSrSrSrS rSrg)GenerationTypeZ   beam_searchgreedy_searchsamplingc                     U R                   $ N)value)selfs    e/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/convert_generation.py__str__GenerationType.__str___   s    zz     N)	__name__
__module____qualname____firstlineno__
BEAMSEARCHGREEDYSEARCHSAMPLINGr*   __static_attributes__r-   r,   r)   r    r    Z   s    J"LHr,   r    argvreturnc                    [         R                  " 5       nUR                  S5      nUR                  SSS[        SSR                  [        [        -   [        -   5      -   S9  UR                  SS	[        S
/ SQSSR                  / SQ5      -   S9  UR                  SS	[        [        R                  R                  SS5      SS9  UR                  SS	[        SSS9  UR                  SS	[        SSS9  UR                  SS	SSS9  UR                  S	S9  UR                  S5      nUR                  SS[        SS9  UR                  S S!S	[        [        R                  R                  [        R                  R                  [        R                  R                  /S"S9  UR                  S#S$S	S%S&/S'S(9  UR                  S)S*S	SS+S9  UR                  S	S,9  UR                  S-S.S	SS/S9  UR                  S	S09  UR                  S1S2S	SS3S9  UR                  S	S49  UR                  S5S6S	SS7S9  UR                  S	S89  UR                  S9S:S	SS;S9  UR                  S	S<9  UR                  S=S	SS>S9  UR                  S	S?9  UR                  S@5      nUR                  SAS	SSBS9  UR                  S	SC9  UR                  SDS	SSES9  UR                  S	SF9  UR                  SGS	SSH9  UR                  S	SI9  UR                  SJ[         S	SKSLSM9  UR                  SNS	SSOS9  UR                  S	SP9  UR                  SQS	SSRS9  UR                  S	SS9  UR                  STS	SSUS9  UR                  S	SV9  UR                  SWS	SSXS9  UR                  S	SY9  UR                  SZS	SS[S9  UR                  S	S\9  UR                  S]S	SS^S9  UR                  S	S_9  UR                  S`S	SSaS9  UR                  S	Sb9  UR                  Sc5      nUR                  Sd[         S	SeSfSM9  UR                  Sg[         S	ShSiSM9  UR                  Sj[         S	SkSlSM9  UR                  Sm[         S	SeSnSM9  UR                  So["        S	SeSpSM9  UR                  Sq["        S	SeSrSM9  UR                  Ss["        S	StSuSM9  UR                  Sv["        S	StSwSM9  UR                  Sx["        S	[#        Sy5      * SzSM9  UR                  S{[         S	SeS|SM9  UR                  S}["        S	S~SSM9  UR                  S[         S	SKSSM9  UR                  S[         S	SSSM9  UR                  S[         S	SSSM9  UR                  S[         S	SSSM9  UR                  S5      nUR                  SS	SSS9  UR                  S	S9  UR                  SS	SSS9  UR                  S	S9  UR                  SS	SSS9  UR                  S	S9  UR                  SS	SSS9  UR                  S	S9  UR                  SS	SSS9  UR                  S	S9  UR                  SS	[         SeSS9  UR                  SS	SSS9  UR                  S	S9  UR%                  U 5      nU$ )zParse arguments

Args:
    argv (Optional[List[str]], optional): _description_. Defaults to None.

Returns:
    argparse.Namespace: Parsed arguments.
zInput optionsz-m--model_name_or_pathTzEPytorch model checkpoint path, or pretrained model name in the list: , )requiredtypehelpz--model_typeFgpt2)r>   t5mt5z*Model type (default is gpt2) in the list: )r;   r<   defaultchoicesr=   --cache_dir.cache_modelsz%Directory to cache pre-trained models)r;   r<   rA   r=   z--decoder_onnxr   zLPath of onnx model for decoder. Specify it when you have exported the model.z--encoder_decoder_init_onnxzgPath of ONNX model for encoder and decoder initialization. Specify it when you have exported the model.z	--verbose
store_truezPrint more information)r;   actionr=   )verbosezOutput options--outputz,Output path for onnx model with beam search.z-p--precisionzTPrecision of model to run. fp32 for full precision, fp16 for half or mixed precisionz-b--op_block_list*autozDisable certain onnx operators when exporting model to onnx format. When using defaultvalue for gpt2 type of model fp16 precision, it will be set to ["Add", "LayerNormalization", "SkipLayerNormalization", "FastGelu"]. Other situation, it will be set to [])r;   nargsrA   r=   z-e--use_external_data_formatz!save external data for model > 2G)use_external_data_formatz-sz--run_shape_inferencezrun shape inference)run_shape_inferencez-dpvsz--disable_pad_vocab_sizezDo not pad logits MatMul weight to be a multiple of 8 along the dimension where dim value is the vocab size. The logits MatMul may hence be of poor performance for fp16 precision.)disable_pad_vocab_sizez-dsgdz,--disable_separate_gpt2_decoder_for_init_runzDo not create separate decoder subgraphs for initial and remaining runs. This does not allow for optimizations based on sequence lengths in each subgraph)*disable_separate_gpt2_decoder_for_init_runz-iz--disable_shared_initializerszdo not share initializers in encoder and decoder for T5 or in the init decoder and decoder for GPT2. It will increase memory usage of t5/mt5/gpt2 models.)disable_shared_initializersz--encoder_decoder_initzbAdd decoder initialization to encoder for T5 model. This is legacy format that will be deprecated.)encoder_decoder_initz6Beam search parameters that stored in the output modelz--output_sequences_scoreszoutput sequences scores)output_sequences_scoresz--output_token_scoreszoutput token scores)output_token_scoresz--early_stopping)r;   rG   )early_stoppingz--no_repeat_ngram_sizer   zNo repeat ngram size)r<   r;   rA   r=   z--vocab_maskz\Enable vocab_mask. This mask applies only to every generated token to filter some bad words.)
vocab_maskz--past_present_share_bufferzWUse shared buffer for past and present, currently work for gpt2 greedy/sampling search.)past_present_share_bufferz--use_decoder_masked_attentionzUses `DecoderMaskedSelfAttention` or `DecoderMaskedMultiHeadAttention` to optimize the decoding Attention computation. Must be used with `past_present_share_buffer`. Currently, only Attention head sizes of 32, 64 and 128 are supported.)use_decoder_masked_attentionz--prefix_vocab_maskzeEnable prefix_vocab_mask. This mask can be used to filter bad words in the first generated token only)prefix_vocab_maskz--custom_attention_maskz]Enable custom_attention_mask. This mask can be used to replace default encoder attention mask)custom_attention_maskz--presence_maskz!Presence mask for custom sampling)presence_maskz--seedzRandom seed for sampling op)seedzYBeam search parameters not stored in the output model, for testing parity and performancez--min_length   zMin sequence lengthz--max_length2   zMax sequence lengthz--num_beams   z	Beam sizez--num_return_sequencesz&Number of return sequence <= num_beamsz--length_penaltyz<Positive. >1 to penalize and <1 to encourage short sentence.z--repetition_penaltyz-Positive. >1 to penalize and <1 to encourage.z--temperature      ?z6The value used to module the next token probabilities.z--top_pzTop P for samplingz--filter_valueInfzFilter value for Top P samplingz--min_tokens_to_keepzAMinimum number of tokens we keep per batch example in the output.z--presence_penalty        z%presence penalty for custom sampling.z--customz&If 1 customized top P logic is appliedz--vocab_sizezIVocab_size of the underlying model used to decide the shape of vocab maskz--eos_token_idzKcustom eos_token_id for generating model with existing onnx encoder/decoderz--pad_token_idzKcustom pad_token_id for generating model with existing onnx encoder/decoderz0Other options for testing parity and performancez--use_sln_strict_modez_Enable strict mode for SLN in CUDA provider. This ensures a better accuracy but will be slower.)use_sln_strict_mode	--use_gpuz)use GPU for inference. Required for fp16.)use_gpuz--disable_parityzdo not run parity test)disable_parityz--disable_perf_testzdo not run perf test)disable_perf_testz--torch_performanceztest PyTorch performance)torch_performancez--total_runsz4Number of times of inference for latency measurementz--save_test_dataz-save test data for onnxruntime_perf_test tool)save_test_data)argparseArgumentParseradd_argument_groupadd_argumentstrjoinr   r   r   ospathset_defaultsr   FLOAT32r'   FLOAT16intfloat
parse_args)r6   parserinput_groupoutput_groupmodel_groupbeam_parameters_group
test_groupargss           r)   parse_argumentsr   c   s    $$&F++O<KT
))*-AADYY
Z[   %9DIIF[<\\   S.14   [   %v   %	   &,,-=>L;	   !!''""(()*;*;*A*ABc   X  	 $0   u="   %8"b   U;6G   O'E   %@ q	   59++,deK#&	   U;"	   7/%UE2 #   k	   .%f	   u=(	   %@t	   u5!l	   590	   51*	   %("55c &&~C%YZav&w&&~C%Y[bw&x&&}3XY`k&l&& 5 '  &&K '  &&< '  &&E '  &&! '  &&u. '  &&P '  &&4 '  &&5 '  &&X '  &&Z '  &&Z '  **+]^Jn	   68	   E*%	   51#	   e4'	   e4C   <	   51T"DKr,   r   c                    U R                   nSUSU R                  SSU R                  SSSSS	/nU R                  (       a  UR	                  S
U R                  /5        U R
                  (       a  UR                  S5        U R                  (       a  UR                  S5        [        U R                  5      (       a-  UR	                  S/5        UR	                  U R                  5        U R                  [        R                  R                  :X  a  U R
                  (       d   S5       eU R                  (       a  [        R                  SU 35        [!        US9  g)zeConvert GPT-2 model to onnx

Args:
    args (argparse.Namespace): arguments parsed from command line
r9   rI   z--optimize_onnxrJ   z--test_runs1z--test_cases10z--overwriterC   rh   rO   rK   zEfp16 or mixed precision model cannot run in CPU. Please add --use_gpuzarguments for convert_to_onnx:)r6   N)model_name_or_pathdecoder_onnx	precision	cache_dirextendri   appendrP   lenop_block_listr   rx   r'   rH   loggerinfoconvert_gpt2_to_onnx)r   
model_name	argumentss      r)   gpt2_to_onnxr     s    ((J 	I ~~-89||%$$56
4+,-++,~~**000||ddd|
 ||4YK@Ai(r,   c                     [        U R                  U R                  [        U R                  5      R
                  U R                  U R                  U R                  [        R                  R                  :g  U R                  SSSSSU R                  U R                  U R                  [        R                  R                  :H  S9n[        R                  SUS    35        [        R                  SUS    35        US   U l        US   U l        g)	zbConvert T5 model to onnx

Args:
    args (argparse.Namespace): arguments parsed from command line
FT)r   r   
output_dirri   rP   optimize_onnxr   rH   use_decoder_start_token	overwritedisable_auto_mixed_precisionuse_int32_inputs
model_typerU   force_fp16_iozonnx model for encoder: r   zonnx model for decoder: r`   N)export_t5_onnx_modelsr   r   r   outputparentri   rP   r   r   rx   r'   r   rU   r   debugencoder_decoder_init_onnxr   )r   pathss     r)   
t5_to_onnxr   $  s     "22..$++!%!>!>~~):):)@)@@.. %%*??!66~~):):)@)@@E$ LL+E!H:67
LL+E!H:67%*1XD"aDr,   	onnx_pathrP   c                     SSK Jn  [        R                  " U SS9nUR	                  USSS9nU(       a  [
        R                  " X@US9  g	[        R                  S5        g	)
zShape inference on an onnx file, which will be overwritten.

Args:
    onnx_path (str): Path of onnx model
    use_external_data_format(bool): output tensors to external data or not.
r   )SymbolicShapeInferenceTload_external_dataF)
auto_mergeguess_output_ranksave_as_external_dataz4Failed to run symbolic shape inference on the model.N)	&onnxruntime.tools.symbolic_shape_inferr   onnx
load_modelinfer_shapesr   saver   warning)r   rP   r   modelouts        r)   shape_inferencer   B  sO     NOOI$?E
 
-
-eX]
-
^C
s=UVMNr,   c                    [         R                  " U SS9nUR                  R                  S   R                  n[        U5      nUR                  5       nX5;   d   eXS   nUR                  S:w  a  gSnUR                  UR                  S   5      nUc;  UR                  USS5      n	U	c  gUR                  U	R                  S   5      nUc  gSnUR                  [        R                  R                  :w  a  g[        UR                   5      S:w  a  gUR                   S   n
U
S	-  S:X  a  g["        R$                  " U
S	-  5      S	-  nX-
  nUR&                  (       a  U(       al  [(        R*                  " UR                   S   U4[(        R,                  S
9n[(        R.                  " [0        R2                  " U5      U4SS9nXR                   S'   Oj[(        R*                  " XR                   S   4[(        R,                  S
9n[(        R.                  " [0        R2                  " U5      U4SS9nXR                   S'   UR5                  5       Ul        Og[
        R6                  " X US9  g)zPad the logits MatMul weight in the provided decoder model, which will be overwritten.

Args:
    onnx_path (str): Path of onnx model
    use_external_data_format(bool): output tensors to external data or not.
Tr   r   MatMulFr`   	Transpose      dtypeaxisr   )r   r   graphr   namer   output_name_to_nodeop_typeget_initializerinputmatch_parent	data_typer   DataTyperx   r   dimsmathceilraw_datanpzerosfloat16concatenater   to_arraytobytesr   )r   rP   decoder_model_protologits_output_namedecoder_modelr   matmul_nodepad_along_axis_1logits_weighttranspose_before_matmulactual_vocab_sizepadded_vocab_sizepaddingpadding_dataweight_with_paddings                  r)   pad_weights_of_logits_matmulr   T  s@    //)M,2299!<AA12M';;=444%9Kh&
 !11+2C2CA2FGM"/"<"<[+WX"Y"*%556M6S6STU6VW   +"6"6">">> =!# &**1-A!#		"3a"781<3G 88]%7%7%:G$DBJJWL"$..+2F2F}2UWc1dkl"m$5q!88W.@.@.C$DBJJWL"$..+2F2F}2UWc1dkl"m$5q!!4!<!<!> NN&Iabr,   
model_pathri   rg   c                 @   [        5       n[        R                  Ul        U(       a  SS/OS/nU(       aZ  S[	        5       ;  a  [        S5      e[        R                  S5        U(       a%  SS0nSU0nU Vs/ s H  owU;   a  XvU   4OUPM     nn[        XUS9nU$ s  snf )a`  Create OnnxRuntime session.

Args:
    model_path (str): onnx model path
    use_gpu (bool): use GPU or not
    use_sln_strict_mode (bool): use strict mode for skip layer normalization or not

Raises:
    RuntimeError: CUDAExecutionProvider is not available when --use_gpu is specified.

Returns:
    onnxruntime.InferenceSession: The created session.
CUDAExecutionProviderCPUExecutionProviderz5CUDAExecutionProvider is not available for --use_gpu!zuse CUDAExecutionProvider"enable_skip_layer_norm_strict_modeT)	providers)	r   r   ORT_DISABLE_ALLgraph_optimization_levelr   RuntimeErrorr   r   r   )	r   ri   rg   sess_optionsexecution_providerscuda_provider_optionsprovider_optionsr   ort_sessions	            r)   create_ort_sessionr     s     "#L,B,R,RL)OV24JK]s\t"*A*CCVWWKK34%I4$P! 79NOat#atY]:J2J-.PTTat   # #:GZ[K#s   6Br   r   c           
         U[         R                  R                  :H  n[        U R                  5      nUS-
  nUS:  d   e/ SQ[        U5       Vs/ s H  nSU 3PM
     sn-   n[        U R                  5      [        U5      :w  a-  [        S[        U5       S[        U R                  5       35      e[        U5       H  u  pWU R                  U   R                  U:w  a+  [        SU SU SU R                  U   R                   35      e[        R                  nUS:  a'  U(       a  [        R                  O[        R                  nU R                  U   R                  R                  R                  n	X:w  d  M  [        SU S	U SU	 35      e   [        R!                  S
5        S/[        U5       Vs/ s H  nSU 3PM
     sn-   n
[        U R"                  5      [        U
5      :w  a-  [        S[        U
5       S[        U R"                  5       35      e[        U
5       H  u  p[U R"                  U   R                  U:w  a+  [        SU SU SU R"                  U   R                   35      eU(       a  [        R                  O[        R                  nU R"                  U   R                  R                  R                  nX:w  d  M  [        SU S	U SU 35      e   [        R!                  S5        gs  snf s  snf )a  Verify GPT-2 subgraph

Args:
    graph (onnx.GraphProto): onnx graph of GPT-2
    precision (Precision): Precision (FLOAT16 or FLOAT32) of the model.

Raises:
    ValueError: Number of inputs not expected.
    ValueError: Input name is not expected.
    ValueError: Input data type is not expected.
    ValueError: Number of outputs not expected.
    ValueError: Output name is not expected.
    ValueError: Output data type is not expected.
   r`   )	input_idsposition_idsattention_maskpast_ Number of inputs expected to be . Got Input  is expected to be $ is expected to have onnx data type z:Verifying GPT-2 graph inputs: name and data type are good.logitspresent_!Number of outputs expected to be Output z;Verifying GPT-2 graph outputs: name and data type are good.N)r   rx   r'   r   r   range
ValueError	enumerater   r   INT32FLOATr<   tensor_type	elem_typer   r   r   )r   r   
is_float16input_countlayer_countiexpected_inputsexpected_inputexpected_type
input_typeexpected_outputsexpected_outputoutput_types                r)   verify_gpt2_subgraphr    s    i//555Jekk"K/K!E^cdo^pHq^pYZ5QRPS^pHqqO
5;;3//;C<P;QQWX[\a\g\gXhWijkk&7;;q>.0vaS(;N;K6RWR]R]^_R`ReReQfghh#))63=K//;CTCTM[[^((44>>
&vaS(L]O[ablamnoo 8 KKLM zU;=O$P=Oxs^=O$PP
5<<C 011<SAQ=R<SSYZ]^c^j^jZkYlmnn'(89<<??2wqc)<_<MVTYT`T`abTcThThSijkk/9++{?P?Pll1o**66@@'vaS(L]O[abmanopp : KKMN A Ir" %Qs   K86K=c           
      4   U[         R                  R                  :H  nU(       a  [        R                  O[        R                  n[        U R                  5      nUS-
  S-  nUS:  d   eSS/n[        U5       H+  nUR                  SU 35        UR                  SU 35        M-     [        U5       H+  nUR                  SU 35        UR                  S	U 35        M-     [        U R                  5      [        U5      :w  a-  [        S
[        U5       S[        U R                  5       35      e[        U5       H  u  pxU R                  U   R                  U:w  a+  [        SU SU SU R                  U   R                   35      eUS:  a  [        R                  OUn	U R                  U   R                  R                  R                  n
X:w  d  M  [        SU SU	 SU
 35      e   S/n[        U5       H+  nUR                  SU 35        UR                  SU 35        M-     [        U R                   5      [        U5      :w  a-  [        S[        U5       S[        U R                   5       35      e[        U5       H  u  p|U R                   U   R                  U:w  a+  [        SU SU SU R                   U   R                   35      eU R                   U   R                  R                  R                  nX:w  d  M  [        SU SU SU 35      e   g)  Verify T5 decoder subgraph

Args:
    graph (onnx.GraphProto): onnx graph of T5 decoder
    precision (Precision): Precision (FLOAT16 or FLOAT32) of the model.

Raises:
    ValueError: Number of inputs not expected.
    ValueError: Input name is not expected.
    ValueError: Input data type is not expected.
    ValueError: Number of outputs not expected.
    ValueError: Output name is not expected.
    ValueError: Output data type is not expected.
r   rb   r`   r   encoder_attention_maskpast_key_self_past_value_self_past_key_cross_past_value_cross_r   r   r   r   r   r   present_key_self_present_value_self_r   r   N)r   rx   r'   r   r  r   r   r   r   r   r   r   r  r<   r  r  r   )r   r   r  
float_typer  r  r	  r  r
  r  r  r  r  r  s                 r)   verify_t5_decoder_subgraphr    s	    i//555J(2$$8I8IJekk"K?q(K! #$<=O;s34!1!56   ;45!21#67   5;;3//;C<P;QQWX[\a\g\gXhWijkk&7;;q>.0vaS(;N;K6RWR]R]^_R`ReReQfghh-.U))
[[^((44>>
&vaS(L]O[ablamnoo 8 !z;"3A3 78"5aS 9:   5<<C 011<SAQ=R<SSYZ]^c^j^jZkYlmnn'(89<<??2wqc)<_<MVTYT`T`abTcThThSijkkll1o**66@@$wqc)Mj\Y_`k_lmnn :r,   c           
         U[         R                  R                  :H  nSU R                  S   R                  ;   n/ SQnU(       a  USS n[        U R                  5      [        U5      :w  a-  [        S[        U5       S[        U R                  5       35      e[        U5       H  u  pVU R                  U   R                  U:w  a+  [        SU S	U SU R                  U   R                   35      e[        R                  nU R                  U   R                  R                  R                  nX:w  d  M  [        SU S
U SU 35      e   U(       a{  [        U R                  5      S-  S:X  d   e[        U R                  5      S-  n	U	S:  d   e/ n
[        U	5       H+  nU
R                  SU 35        U
R                  SU 35        M-     O[         R#                  S5        [        U R                  5      S-
  S-  S:X  d   e[        U R                  5      S-
  S-  n	U	S:  d   eSS/n
[        U	5       H+  nU
R                  SU 35        U
R                  SU 35        M-     [        U	5       H+  nU
R                  SU 35        U
R                  SU 35        M-     [        U R                  5      [        U
5      :w  a-  [        S[        U
5       S[        U R                  5       35      e[        U
5       H  u  p[U R                  U   R                  U:w  a+  [        SU S	U SU R                  U   R                   35      eU(       a  [        R                  O[        R$                  nU R                  U   R                  R                  R                  nX:w  d  M  [        SU S
U SU 35      e   [         R'                  S5        g)r  crossr   )encoder_input_idsr  decoder_input_idsNr   r   r   r   r   r   r`   present_key_cross_present_value_cross_zZThis format is deprecated. Please export T5 encoder in new format with only cross outputs.rb   r   encoder_hidden_statesr  r  r   r   zMT5 encoder graph verified: name and data type of inputs and outputs are good.)r   rx   r'   r   r   r   r   r   r   r   r  r<   r  r  r   r   r   r   r  r   )r   r   r  
new_formatr	  r  r
  r  r  r  r  r  r  s                r)   'verify_t5_encoder_decoder_init_subgraphr$  G  s    i//555JELLO000JO
 )"1-
5;;3//;C<P;QQWX[\a\g\gXhWijkk&7;;q>.0vaS(;N;K6RWR]R]^_R`ReReQfghh#))[[^((44>>
&vaS(L]O[ablamnoo 8 5<< 1$)))%,,'1,a {#A##&8$<=##&:1#$>? $ 	stELL!A%*a///5<<(1,2a %&=>{#A##&7s$;<##&9!$=> $ {#A##&8$<=##&:1#$>? $ 5<<C 011<SAQ=R<SSYZ]^c^j^jZkYlmnn'(89<<??2wqc)<_<MVTYT`T`abTcThThSijkk/9++{?P?Pll1o**66@@'wqc)Mm_\bcnbopqq : KK_`r,   graph1graph2shared_prefixmin_elementssignature_cache1signature_cache2c                 	   0 n0 n/ n/ n	/ n
U R                    GH  nUR                  (       a  [        UR                  5      U:  d  M0  UR                    H  nUR                  (       a  [        UR                  5      U:  d  M/  [        R                  " XXE5      (       d  MM  X,R
                  -   XkR
                  '   UR                  U5        UR
                  U;  a>  X,R
                  -   nXUR
                  '   U	R                  U5        U
R                  U5          GM     GM     [        R                  SU
 35        U R                   HV  n[        [        UR                  5      5       H1  nUR                  U   U
;   d  M  [        SUR                  U    35      e   MX     UR                   HV  n[        [        UR                  5      5       H1  nUR                  U   U
;   d  M  [        SUR                  U    35      e   MX     U	 H  nUR                   R                  U5        M      UR                   H)  nUR
                  U;   d  M  UUR
                     Ul        M+     UR                   H  n[        [        UR                  5      5       Hp  nUR                  U   U;   d  M  X~R                  U      n[        R                  SUR
                   SU SUR                  U    SU 35        UUR                  U'   Mr     M     U H  nU R                   R                  U5        M      U R                   H)  nUR
                  U;   d  M  UUR
                     Ul        M+     U R                   H  n[        [        UR                  5      5       Hp  nUR                  U   U;   d  M  XnR                  U      n[        R                  SUR
                   SU SUR                  U    SU 35        UUR                  U'   Mr     M     U	 H  nUUR
                     Ul        M     U	 H  n[         R"                  R%                  U5      R&                  n[         R(                  R+                  UR
                  UR,                  U5      nU R                  R                  U5        UR                  R                  U5        M     U	$ )	ae  Remove initializers with same value from two graphs.

Args:
    graph1 (GraphProto): the first graph to process
    graph2 (GraphProto): the second graph to process
    shared_prefix (str): add prefix to the shared initializers among two graphs
    min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024.
    signature_cache1 (dict): Optional dictionary to store data signatures of tensors in graph1 in order to speed up comparison
    signature_cache2 (dict): Optional dictionary to store data signatures of tensors in graph2 in order to speed up comparison
zshared initializers:zname is found in graph 1: zname is found in graph 2: zgraph 2 rename node z input z from z to zgraph 1 rename node )initializerr   sumr   has_same_valuer   r   r   r   noder   r   r   r   remove
value_infor   numpy_helperr   shapehelpermake_tensor_value_infor   )r%  r&  r'  r(  r)  r*  mapping_initializers_1mapping_initializers_2shared_initializers_1shared_initializers_2shared_initializers_namesinitializer1initializer2shared_namer/  jr,  r1  new_namer3  s                       r)   remove_shared_initializersr@    s   &   "**!!c,*;*;&<&L"..L %%#l.?.?*@L*P''DTgg<IL]L]<]&'8'89%,,\:$$,BB"/2C2C"CK@K<+<+<=)00>-44[A /	 +& LL'(A'BCD s4::'Azz!} 99"%?

1#OPP (  s4::'Azz!} 99"%?

1#OPP (  -!!+. - ''
??444Z__EJO (
 s4::'Azz!} 661**Q-@3DII;gaStzzZ[}o]abjaklm (

1	 (  -!!+. - ''
??444Z__EJO (
 s4::'Azz!} 661**Q-@3DII;gaStzzZ[}o]abjaklm (

1	 (  -1+2B2BC - -!!**;7==[[778H8H+J_J_afg
  ,  , - ! r,   encoder_modelr   c                 *   [        U 5      n[        U5      nUR                  S5        UR                  S5        0 0 pTUR                  U5        UR                  U5        [        UR                  R
                  UR                  R
                  SUUS9nU$ )Ne_d_s_)r'  r)  r*  )r   add_prefix_to_namesremove_duplicated_initializerr@  r   r   )rA  r   encoderdecoderr)  r*  initializerss          r)   get_shared_initializersrK    s    &G&G%%)+R&))*:;))*:;-))L r,   c                    / nU R                    H@  nUR                  (       a  [        UR                  5      U:  d  M/  UR                  U5        MB     U H  nU R                   R	                  U5        M      U H|  n[
        R                  R                  U5      R                  n[
        R                  R                  UR                  UR                  U5      nU R                  R                  U5        M~     U$ )aF  Remove initializers of a graph, when they have number of elements larger than a threshold.

Args:
    graph (GraphProto): the graph.
    min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024.

Returns:
    List[TensorProto]: initializers that are removed from the graph.
)r,  r   r-  r   r0  r   r2  r   r3  r4  r5  r   r   r1  )r   r(  moved_initializerstensorr,  r3  r1  s          r)   move_initializersrO    s     ##FKK 0L @!!&) $
 *  - * *!!**;7==[[778H8H+J_J_afg

+ *
 r,   c                    U R                   S:X  a  [        SU R                   S35      eU R                   S:X  a  U R                  nGO,U R                   S:X  a  U R                  nGOU R                   S:X  a  U R
                  nOU R                   S:X  a  U R                  nOU R                   S:X  a  U R                  nOU R                   S	:X  a  U R                  nOU R                   S
:X  a  U R                  nO}U R                   S:X  a  U R                  nO`U R                   S:X  a  U R                  nOCU R                   S:X  a  U R                  nO&[        SU R                   SU R                    S35      eU R                  U4$ )z
Convert attribute to kwarg format for use with onnx.helper.make_node.
    :parameter attribute: attribute in AttributeProto format.
    :return: attribute in {key: value} format.
r   z
attribute z does not have type specified.r`   r   r   rb            r   	   
   z has unsupported type rD   )r<   r   r   fr  stgfloatsintsstringstensorsgraphs)	attributer'   s     r)   _attribute_to_pairr`  >  s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r,   c                     0 nU R                    H"  n[        U5      u  p4UR                  X405        M$     U R                  (       a  UR                  SU R                  05        U$ )Ndomain)r_  r`  updaterb  )r/  kwargsattrkeyr'   s        r)   	kwargs_ofrg  c  sR    F)$/sl#  {{x-.Mr,   c                     [        U R                  R                  R                  R                   Vs/ s H+  oR
                  (       a  UR
                  OUR                  PM-     sn5      $ s  snf r&   )tupler<   r  r3  dim	dim_param	dim_value)vids     r)   shape_ofro  m  sH    I\I\IbIbIfIfgIfA++!++AKK?Ifghhgs   2A+subgc                    SnSn/ n[        U R                  5       H  u  pEXA:  ac  [        U5      n[        R                  R                  UR                  UR                  R                  R                  US   US   US   SUS   /S9nUR                  U/5        M     UR                  [        R                  R                  S[        R                  R                  S/S	9/5        U R                  S
5        U R                  R                  U5        / n[        U R                  5       H  u  pEXB:  ac  [        U5      n[        R                  R                  UR                  UR                  R                  R                  US   US   US   SUS   /S9nUR                  U/5        M     U R                  S5        U R                  R                  U5        / nU R                   H  n	U	n
U	R                   S:X  a  [#        U	5      nUR%                  SS05        / nUR                  U	R                  5        ['        U5      S:  a#  UR                  S/5        ['        U5      S:  a  M#  ['        U5      S:  a  UR                  S/5        [        R                  R(                  " SXR                  4SU	R                  0UD6n
UR                  U
/5        M     U R                  S5        U R                  R                  U5        U $ )Nr   r`   r   r   max_seq_lenrb   r  r3  past_sequence_lengthr3  r   r   	AttentionrZ   rR  r   rS  r   r/  )r   r   ro  r   r4  r5  r   r<   r  r  r   r   r  
ClearFieldr   r/  r   rg  rc  r   	make_node)rp  input_past_0output_past_0
new_inputsr  rm  r3  new_outputs	new_nodesr/  new_noderd  niss                r)   1update_decoder_subgraph_past_present_share_bufferr  q  s   LMJ4::&RLE33''--77Qxq58]E!HM 4 B
 	2$ ' t{{99:PRVRbRbRhRhqrps9tuvOOGJJj!K4;;'RLE33''--77Qxq58]E!HM 4 B
 	B4  ( 	OOHKK{#I		<<;&t_FMM6:;CJJtzz"c(Q,

B4  c(Q,3x!|

234{{,,[#{{eQUQZQZe^deH($  	OOFIIYKr,   is_beam_searchswitch_attentionc                    U(       a  / n[        U R                  5       H  u  pEUR                  U/5        M     UR                  [        R                  R                  S[        R                  R                  S/S9/5        UR                  [        R                  R                  S[        R                  R                  / SQS9/5        U R                  S5        U R                  R                  U5        U(       Ga  / SQn/ nU R                   GHK  nUR                  S:X  Ga$  [        U5      n	U	R                  5        H4  n
U
S	:X  a      g
X;  d  M  U
S:w  a  [        R                  SU
 S35        X	 M6     / nUR                  UR                  5        U(       at  [        U5      S:  a#  UR                  S/5        [        U5      S:  a  M#  [        U5      S:  a  UR                  S/5        [        U5      S:  a  UR                  S/5        [        R                  R                   " SUUR"                  4SUR$                  0U	D6nUR                  U/5        GMN     U R                  S5        U R                  R                  U5        g)a?  Update the Attention nodes to DecoderMaskedSelfAttention.

Args:
    subg (GraphProto): GraphProto of the decoder subgraph
    is_beam_search (bool): Boolean specifying if the sampling algo is BeamSearch
    switch_attention (bool): Boolean specifying if `Attention` is to be switched with `DecoderMaskedSelfAttention`

beam_widthr`   ru  cache_indirection
batch_sizer  rr  r   rZ   	num_headsscalemask_filter_valuerb  rv  qkv_hidden_sizesFunidirectionalzRemoving attribute: zB from Attention node while switching to DecoderMaskedSelfAttentionrS  r   r   rT  DecoderMaskedSelfAttentionr   r/  T)r   r   r   r   r4  r5  r   r  rw  r/  r   rg  copyr   r   r   rx  r   r   )rp  r  r  r{  _irm  'decoder_masked_attention_supported_attrr}  r/  rd  kr  s               r)   4update_decoder_subgraph_use_decoder_masked_attentionr    s3    


+FBrd# , 	4;;==lDL\L\LbLbkljm=nop22'$$**E 3 	
 	 

*%3
/ 	IID||{*"4A ..$G  00"NN"6qc9{ | #I! '$ 

4::& "c(Q,

B4( c(Q,3x!|

L>23x!|

$7#89{{,,0KK 	
  dV$Q R 			#r,   c                    [        5       n/ n[        U R                  5       VVs0 s H  u  p4UR                  U_M     nnn0 n0 nU R                   H^  nUR                   H+  n	U	(       d  M  X;  a  U/Xi'   M  Xi   R                  U5        M-     UR                   H  n
U
(       d  M  XU
'   M     M`     U R                   GH  nUR                  S:X  d  M  UR                  S   (       a  UR                  S   (       d  M@  UR                  S   UR                  S   pSnSU;   aT  U R                   HC  nUR                  S:X  d  M  UR                  S   U:X  d  M*  UR                  S   R                  n  O*   O'U R                   H  nUR                  U:X  d  M  Un  O   Uc  M  [        R                  R                  U5      nUR                  S:X  d  GM  UR                  5       S;   d  GM.  UR                  S   U;   d  GMD  X{   nUR                  S:X  a  UR                  S   (       d  GMo  UR                  S   U;   a  UR                  S   R!                  S	5      (       d#  UR                  S   R!                  S
5      (       av  UR                  5       S:X  ab  UR#                  UR                  S   5        UR                  U5        [%        UUR                  S      5      S:X  a  UR                  U5        GM>  UR                  S   U;  a  GMT  UUR                  S      nUR                  S:X  a  UR                  S   (       d  GM  UUR                  S      nUR                  S:X  a  UR                  S   (       d  GM  UR                  S   U;   d  GM  UR                  S   R!                  S	5      (       d&  UR                  S   R!                  S
5      (       d  GM%  UR                  5       S:X  d  GM<  UR#                  UR                  S   5        UR'                  UUU/5        [%        UUR                  S      5      S:X  d  GM  UR                  U5        GM     X4$ s  snnf )a^  Correct graph which originally use dim of past_seq_len from input_ids's shape which is fixed to max_seq_len after
   shared past/present buffer

Args:
    subg (GraphProto): GraphProto of the decoder subgraph
return:
    tensor_names_to_rename : set of tensor names which is equal to past_sequence_length
    nodes_to_remove : list of node to remove
Gatherr`   r   N	Constant_Constant>   r`   r   Shaper  r  r   Reshaper   )setr   r   r   r/  r   r   r   r_  rX  r,  r   r2  r   sizeitem
startswithaddr   r   )rp  tensor_names_to_renamenodes_to_removeindexinpgraph_input_namesinput_name_to_nodesr   r/  
input_nameoutput_nameshape_tensor_nameshape_index_nameini_gather_indices
const_noderN  gather_indices_arr
shape_nodereshape_nodetranspose_nodes                       r)   find_past_seq_len_usager    s    !UO;DTZZ;PQ;PZU5;PQ		**Jz87;f'3'3::4@ %  ;;K{37K0 '  		 <<8#::a=

1 48::a=$**Q-/!%.."&))J!))Z7J<M<Ma<PTd<d-7-A-A!-D-F-F* #, #..F{{&66-3* / ")!%!2!2!;!;<N!O #''1,&++-7JJqM%880C
"**g5*:J:J1:M $$Q'+<<"((+667GHH%++A.99:LMM*//1Q6 +..t{{1~>#**40.z/@/@/CDEJ'..z: ##A&.AA2:3C3CA3FG$,,	9l>P>PQR>S!4\5G5G5J!K&..+=.BVBVWXBY #((+/@@&,,Q/::;KLL)//2==>PQQ*//1Q6 +..t{{1~>#**D*l+KL.~/D/DQ/GHIQN'..~> Q T "22s Rs   Q(r   past_seq_len_namec                 f   Sn[        [        S U R                  R                  R                  5      5      nU H  n[        UR                  5      S:  a6  UR                  R                  S5        [        UR                  5      S:  a  M6  UR                  R                  U5        UR                  R                  U5        M     U R                  R                  R                  R                  [        R                  R                  U[        R                  / SQS95        U R                  5         U $ )Nr  c                      U R                   S:H  $ NMultiHeadAttentionr   r/  s    r)   <lambda>.add_cache_indirection_to_mha.<locals>.<lambda>d      9M)Mr,   r   r   r  r  max_sequence_lengthru  )listfilterr   r   r/  r   r   r   r   r4  r5  r   r  topological_sort)r   r  cache_indirection_name	mha_nodesr/  s        r)   add_cache_indirection_to_mhar  a  s    0VMu{{O`O`OeOefgI $**o!JJb! $**o!

+,

01  
KK""**"K$5$5=p 	+ 	

 
Lr,   r   skip_node_idxsc                    Sn/ n[        [        S U R                  R                  R                  5      5      n[        U5       GH  u  pgXb;   a  M  SnUR                   H!  n	U	R                  S:X  d  M  U	R                  n  O   Un
U
S:X  aR  U R                  R                  R                   H.  nUR                  UR                  S   :X  d  M"  UR                  n
  O   SnU R                  R                  R                   HY  nUR                  UR                  S   :X  d  M"  UR                  R                  R                  R                  S   R                   n  O   [#        UR$                  5      S:  a6  UR$                  R'                  S	5        [#        UR$                  5      S:  a  M6  U S
US-   3nUR$                  R'                  U5        UR'                  [(        R*                  R-                  UU
SUSU/S95        GM     U R                  R                  R$                  R/                  U5        U R1                  5         U $ )Noutput_cross_qkc                      U R                   S:H  $ r  r  r  s    r)   r  &add_output_qk_to_mha.<locals>.<lambda>z  r  r,   r   r  r   target_sequence_lengthr`   r   r   _r  sequence_lengthru  )r  r  r   r   r/  r   r_  r   r  r,  r   r   r<   r  r3  rj  rl  r   r   r   r   r4  r5  r   r  )r   r   r  output_qk_basename
output_qksr  idxr/  r  attoutput_qk_dtyper  r  output_qk_names                 r)   add_output_qk_to_mhar  v  s   *JVMu{{O`O`OeOefgIy)	  	>>Cxx;&EE	 "  a[[&&2266TZZ]*&'kkO 3 ":""((AvvA&)*););)A)A)E)Ea)H)R)R& ) $++"KKr" $++" //q
;>*KK..#Y0ACYZ / 	
E *T 
KK##J/	Lr,   c                   ^^ SnSnSn[        [        S U R                  R                  R                  5      5      S   nU R                  U/ SQ/ SQ5      nU R                  USS	/SS
/5      nUb  UnOUb  UnO[        R                  S5        g US   nUR                  S:X  Gat  US   n	U R                  U	SS/SS/5      mTc  [        R                  S5        g U R                  U	/ SQ/ SQ5      n
U
c  [        R                  S5        g U
S   nTU
S
S  :w  a  [        R                  S5        g [        [        U4S jU R                  R                  R                  5      5      S   nU R                  R                  R                  R                  U5        U R                  R                  R                  R                  TS   5        U R                  R                  R                  R                  TS
   5        X9R                  S'   X;R                  S'   GOBU R                  U/ SQ/ SQ5      nUc  [        R                  S5        g US
   nU R                  U/ SQ/ SQ5      mTc  [        R                  S5        g TS   nUSS  TS
S  :w  a  [        R                  S5        g [        [        U4S jU R                  R                  R                  5      5      S   nU R                  R                  R                  R                  U5        [        [        U4S jU R                  R                  R                  5      5      S   nU R                  R                  R                  R                  U5        U R                  R                  R                  R                  TS
   5        U R                  R                  R                  R                  TS   5        U R                  R                  R                  R                  TS    5        U R                  R                  R                  R                  TS!   5        X>R                  S'   X;R                  S'   U R                  R                  R                  R                  [        R                  R                  U[        R                   S
/S"95        [        R                  R#                  S#U/U/U R%                  S#5      S$9n[        R                  R                  U[        R                   / S"9n[        R                  R#                  S%U/U/U R%                  S%5      [        R&                  S&9n[        R                  R                  U[        R&                  / S"9nU R                  R                  R                  R)                  UU/5        U R                  R                  R*                  R)                  UU/5        U R-                  5         X4$ )'Nrt  past_seq_len_int32past_seq_len_int64c                      U R                   S:H  $ )NLayerNormalizationr  )ns    r)   r  *fix_past_sequence_length.<locals>.<lambda>  s    .B!Br,   r   )Addr  TileExpand	UnsqueezeRange)r   r`   r`   r   r   r   r  Slicer`   zBCannot identify base path for fixing past_sequence_length subgraphrf   r  r  r  zDCannot identify gather path for fixing past_sequence_length subgraph)r  r  r  r`   r   r   zACannot identify add path for fixing past_sequence_length subgraphz]Gather path and add path do not share the same nodes for calculating the past_sequence_lengthc                 H   > U R                   S   TS   R                  S   :H  $ Nr   r`   r   r   )r  gather_paths    r)   r  r    s"    188A;+a.BVBVWXBY3Yr,   )r  r  r  r  r  r   )r   r   r   r   r   r   zGCannot identify input_ids path for fixing past_sequence_length subgraph)r  r  r  r  r   )r`   r   r   r   r   zFCannot identify past_key path for fixing past_sequence_length subgraphr   ziThe input_ids path and past_key path do not share the same nodes for calculating the past_sequence_lengthc                 H   > U R                   S   TS   R                  S   :H  $ r  r  r  past_key_paths    r)   r  r  '  s$    188A;-PQBRBXBXYZB[3[r,   c                 H   > U R                   S   TS   R                  S   :H  $ )Nr   r`   r  r  s    r)   r  r  )  s$    AHHQK=QSCTCZCZ[\C]4]r,   r   rb   ru  Squeezeinputsoutputsr   Castr  r  r   to)r  r  r   r   r/  match_parent_pathr   r   r   r0  r   r   r   r4  r5  r   r  rx  create_node_nameINT64r   r1  r  )r   r  r  r  r/  base_path_hfbase_path_oai	base_path	base_node
range_nodeadd_pathadd_nodeconstant_in_gatherinput_ids_pathunsqueeze_nodeconstant_in_reshapesqueeze_nodesqueeze_output	cast_nodecast_outputr  r  s                       @@r)   fix_past_sequence_lengthr    sL   D /--BEKKDUDUDZDZ[\]^_D**AL
 ++		
AM
  			"!	XY"IG#r]
--wF

 KK^_**&

 KK[\A;(12,&KKwx "&)Y[`[f[f[l[l[q[q"rstuv%%&89%%k!n5%%k!n5 1.q 00K

 !KKab!!$//D

  KK`a&q)!"qr!22KK{  "&)[]b]h]h]n]n]s]s"tuvwx%%&89"6*]_d_j_j_p_p_u_u#vw
 	%%&9:%%mA&67%%mA&67%%mA&67%%mA&67 #5Q.q 
KK""**+<k>O>OXYWZ*[
 ;;((!"#$##I.	 ) L [[778JKL]L]eg7hN%%"##$##F+ & I ++445GIZIZbd4eK 
KK!!<";<	KK  ''(EF	##r,   c                    SnSnU R                   R                  R                  R                  [        R
                  R                  U[        R                  S/S9[        R
                  R                  U[        R                  / SQS9/5        [        [        S U R                   R                  R                  5      5      n[        U5       GHi  u  pVSnUR                   H!  nUR                  S:X  d  M  UR                  n  O   S	US
-   3n	[        R
                  R                  U	[        R                   SUSS/S9n
US
-  S:X  a/  U R                   R                  R"                  R%                  U
5        [        R
                  R'                  SUR                  S   UR                  S   UR                  S
   SS[)        UR                  5      S:  a  UR                  S   OS[)        UR                  5      S:  a  UR                  S   OSUUUUR                  S   /UR"                  S   [)        UR                  5      S:  a  UR"                  S   OS[)        UR                  5      S:  a  UR"                  S
   OSUS
-  S:X  a  U	OS/UR                  R+                  SS5      SUUS
-  SS9nUS
-  S:X  a  UR"                  R-                  S5        U R                   R                  R                  R-                  U5        U R                   R                  R                  R                  U/5        GMl     U R/                  5         U $ )Nr  r  r`   ru  r  c                      U R                   S:H  $ r  r  r  s    r)   r  (replace_mha_with_dmmha.<locals>.<lambda>b  r  r,   r   r  output_cross_qk_r   r  zencode_sequence_length / 2DecoderMaskedMultiHeadAttentionr   rb   rR  rS  r   r  com.microsoft)r  r  r   rb  r  	output_qkrZ   )r   r   r   r   r   r4  r5  r   r  r  r  r/  r   r_  r   r  r  r   r   rx  r   replacer0  r  )r   r  r  r  r  r  r/  r  r  qk_output_name	qk_output
dmmha_nodes               r)   replace_mha_with_dmmhar  S  s   J+	KK""KK..z;;L;LUVTW.XKK..!;#4#4<o / 	
 VMu{{O`O`OeOefgIy)		>>Cxx;&EE	 " ,C1H:6KK66K--lIqRn5o 7 
	 7a<KK$$++I6 [[**-

1

1

1!$TZZ1!4

1"!$TZZ1!4

1"!!

1 A"%djj/A"5A2"%djj/A"5A2"%'Q,B	 ""#79Z["Qw&'3 + 

6 7a<$$R(%%d+%%zl3c *f 
Lr,   	attn_maskkv_num_heads
world_sizewindow_sizec                    U R                  [        R                  R                  S[        R
                  S/S/S95        [        R                  R                  SUS/US-   /U R                  S5      S9n[        R                  R                  SUS-   S/S/U R                  S5      S9n[        R                  R                  S	S/S
/U R                  S	5      [        R                  S9n[        R                  R                  SU/US-   /U R                  S5      S9n[        R                  R                  SUS-   S/S/U R                  S5      SS9n	[        R                  R                  S	S/S/U R                  S	5      [        R                  S9n
U R                  R                  R                  R                  UUUUU	U
/5        [        [        S U R                  R                  R                  5      5      n[        U5       GH  u  pU R!                  U/ SQ/ SQ5      nU R!                  USS/SS/5      nSu  nnnUb  Uu  nnnOUb  Uu  nnU R!                  U/ SQ/ SQ5      nU R!                  USS/SS/5      nSu  nnnUb  Uu  nnnOUb  Uu  nnU R!                  USS/SS/5      nU R!                  US/S/5      nSu  nnUb  Uu  nnOUb  US   nSnUb4  Ub1  UR"                   H!  nUR$                  S:X  d  M  UR&                  nM#     SnUR"                   H!  nUR$                  S:X  d  M  UR&                  nM#     UR(                  S   UR(                  S   :H  =(       a    UR(                  S   UR(                  S   :H  nUS L=(       a    US L=(       a    US Ln US L =(       a    US L =(       a    US L n!Su  n"n#n$U(       GaO  U (       d  U!(       Ga@  [*        R,                  " U R/                  UR(                  S   5      5      n%[*        R,                  " U R/                  UR(                  S   5      5      n&[*        R,                  " U R/                  UR(                  S   5      5      n'U%R0                  S    n([2        R4                  " U%U&U'4SS!9R7                  U(S"U(-  5      n)[        R8                  R;                  U)S#U 3S$9n)U R                  U)5        [        R                  R                  SUR(                  S   U)R$                  /U)R$                   S%3/U R                  S5      S9n*U R                  R                  R                  R                  U*/5        U R                  R                  R                  R=                  U5        U R                  R                  R                  R=                  U5        U R                  R                  R                  R=                  U5        U*R>                  S   n"U (       Ga  [*        R,                  " U R/                  UR(                  S   5      5      n+[*        R,                  " U R/                  UR(                  S   5      5      n,[*        R,                  " U R/                  UR(                  S   5      5      n-U+R0                  S    n([2        R4                  " U+U,U-4SS!9R7                  S"U(-  5      n.[        R8                  R;                  U.S&U 3S$9n.U R                  U.5        [        R                  R                  SU*R>                  S   U.R$                  /U.R$                   S%3/S'9n/U R                  R                  R                  R                  U//5        U R                  R                  R                  R=                  U5        U R                  R                  R                  R=                  U5        U R                  R                  R                  R=                  U5        U/R>                  S   n"O-UR>                  S   n"UR>                  S   n#UR>                  S   n$[        R                  R                  S(U"U#U$UR(                  S)   UR(                  S*   UR>                  S   U
R>                  S   Ub  UR(                  S   OS+Ub  UR(                  S"   OS+/	UR>                  UR$                  RA                  S,S(5      S-UU-  US:X  a  UU-  OX#-  U[C        US L=(       a    US L5      US.9
n0U R                  R                  R                  R=                  U5        U R                  R                  R                  R                  U0/5        Ub/  U R                  R                  R                  R=                  U5        Uc  GM  U R                  R                  R                  R=                  U5        GM     U $ )/Noner`   r   r   r   vals	ReduceSum	_row_sumsr  Subseqlens_k_int64r  	seqlens_kr  r  _shaper  total_seq_len_int64r   )r  r  r   r   total_seq_lenc                      U R                   S:H  $ r  r  r  s    r)   r  &replace_mha_with_gqa.<locals>.<lambda>   r  r,   )RotaryEmbeddingr  r   )r   r   r   r   r   )NNNr  r  r   NNinterleavedr  )r   r   r   rf   r   r   QKV_Weight_r   _output	QKV_Bias_)r  r  GroupQueryAttentionrR  rS  r   r  r  )	r  r  r   rb  r  r  local_window_size	do_rotaryrotary_interleaved)"add_initializerr   r4  make_tensorr   r  rx  r  r  r   r   r/  r   r  r  r   r  r_  r   r  r   r   r   r   r3  r   stackreshaper2  
from_arrayr0  r   r	  ry   )1r   r  r  r  r  reduce_sum_nodesub_nodeseqlen_k_cast_noder  gather_nodetotal_seqlen_cast_noder  r  r/  q_path_1q_path_2q_rotaryq_addq_matmulk_path_1k_path_2k_rotaryk_addk_matmulv_path_1v_path_2v_addv_matmulr"  r  r  root_input_is_sameall_paths_have_biasall_paths_have_no_biasq_input_to_attentionk_input_to_attentionv_input_to_attentionqwkwvwrj  
qkv_weightpacked_matmul_nodeqbkbvbqkv_biaspacked_add_nodegqa_nodes1                                                    r)   replace_mha_with_gqarT    sG	   & 
!''	 	  	
 kk++5![()##K0	 , O {{$$K'/"###E*	 % H ..!"##F+ /  &&{X%&##G,	 ' J ++''H$e,&'##H- ( K "[[22%& !##F+ 3  
KK!!"	
	H VMu{{O`O`OeOefgIy)	**41UW`a**42CX1NQRTUPVW$4!%(0%HeX!!)Hh **41UW`a**42CX1NQRTUPVW$4!%(0%HeX!!)Hh **4%1BQFK**4(aSA$x&OE8!{H H$8))88},"%%%K *
 	>>Cxx;&EE	 "
 &^^A.(..2CCnWXHY]e]k]klm]nHn $4/[E4E[%W[J[!&$!R5D=!RUd] LVH24H#6:P%%e&;&;HNN1<M&NOB%%e&;&;HNN1<M&NOB%%e&;&;HNN1<M&NOB((2,C2r2,Q7??QWMJ**55jUXTYGZ5[J!!*-!%!6!6 q):??;&OO,G45++H5	 "7 " KK""))+=*>?KK""))(3KK""))(3KK""))(3#5#<#<Q#?  # ))%*?*?A*OP ))%*?*?A*OP ))%*?*?A*OPhhrl88RRLq9AA!c'J,,77SVRWGX7Y%%h/"&++"7"7.55a8(--H (g67 #8 #
 !!&&--.?@!!&&--e4!!&&--e4!!&&--e4'6'='=a'@$ $,??1#5 #+??1#5 #+??1#5  ;;((!$$$

1

1"))!,&--a0&.&:"&.&:"
 KK""#79NO":-5AQ5F)z1LLf)($.G843GH*) ) 
, 	%%d+%%xj1KK""))(3KK""))(3E *H Lr,   c           	         SnU R                    Vs/ s H  o"R                  PM     nnUS:  a=  X1   R                  S5      (       d%  US-  nUS:  a  X1   R                  S5      (       d  M%  Sn[        U R                  5      U-
  S-  nSU-  U-   n[        U5       Vs0 s H"  opR                   US-  U-      R                  U_M$     nn[        SU 35        [        U R                   U   5      n	[        SU	 35        U	S   n
U	S   nU	S   nSnU R                   GH;  nUR                  S:X  d  M  UR                   S   U;   d  M+  [        S	UR                   S
UR                   35        US-  nXR                   S      nSU 3nS/S[        UR                  5      -
  -  nUR                  U5        UR                  R                  U5        UR                  R                  [        R                  R                  SS5      /5        [        R                  R!                  U["        R$                  XSU/5      nU R                  R                  U/5        GM>     X:w  a  ['        SU SU 35      eg s  snf s  snf )Nr`   r   pastr   z    -- past_key_cross_inputs = zpast_key_cross_0_shape is r   r  z'    -- add cross QK output from: node: z with output: r  r   r  z#Did not add cross QK for all layersz vs )r   r   r  r   r   r   printro  r/  r   r   r   r_  r   r4  make_attributer5  r   r  r   )rp  input_self_past_0gir  output_self_present_0
num_layersinput_cross_past_0layerpast_key_cross_inputsinput_past_key_cross_0_shapebatch_size_dimnum_heads_dimcross_seq_len_dimnum_layer_output_qkr/  cross_attention_out_nameappended_namescross_attentions                     r)   .update_decoder_subgraph_output_cross_attentionrh    s}   +/::6:R:6
a
(9(L(W(WX^(_(_Q a
(9(L(W(WX^(_(_dkk"%::q@JZ*;;afgqarsarX]ZZ	4F(FGLLeSars	+,A+B
CD#+DJJ7I,J#K 	&'C&D
EF1!4N03M4Q7		LL==DJJqMUjDj;DII;nUYU`U`Tabc1$)**Q-8E)9%'A$ TQT[[)9%9:N!!":;KK~.NN!!4;;#=#=k1#M"NO"kk@@(!!3DEO
 KK01! " (>zl$ObNcdee )E 7 ts   I0)I5c           
      f   SnU R                    Vs/ s H  o"R                  PM     nnUS:  a=  X1   R                  S5      (       d%  US-  nUS:  a  X1   R                  S5      (       d  M%  Sn[        [	        U R                   5      U-
  S-  5      nSU-  U-   n/ n/ nU R
                   H'  n	U	R                  S:X  d  M  UR                  U	/5        M)     [	        U5      U:  a  gS n
U R
                   H  n	U	R                  S:X  d  M  U	n
  O   / S	QnS
n[        U 5      u  p[	        U5      S:  a  U H  n[        SU SU S35        M     U H(  n[        SUR                   SUR                   35        M*     [        R                  R                  SS/S/SS9n[        R                  R                  SS/U/S[        R                  S9nUR                  UU/5        U R
                   GH  n	[	        U	R                  5      S:  a  U
b  U	R                  S   U
R                   S   :X  a_  [        R                  R                  SS/S/S[        R                  S9nUR                  S   U	R                   S'   UR                  U/5        U	R                  S:X  Ga  [!        U	5      nUR#                  5        H  nUU;  d  M  UU	 M     U	R                   S   U	R                   S   U	R                   S   /nUR                  [	        U	R                   5      S:  a  U	R                   S   OS/5        UR                  [	        U	R                   5      S:  a  U	R                   S   OS/5        UR                  [	        U	R                   5      S:  a  U	R                   S   OS/5        UR                  [	        U	R                   5      S:  a  U	R                   S   OS/5        UR                  S/5        UR                  S/5        UR                  S /5        UR                  [	        U	R                   5      S:  a  U	R                   S   OS/5        SUS!'   [        R                  R                  " S"UU	R                  4S#U	R                  0UD6n	X;  d  GM  [%        U	R                   5       H  u  nnUU;   d  M  XR                   U'   M     UR                  U	/5        GM     U R'                  S$5        U R
                  R                  U5        U R                    Vs/ s H  nUR                  PM     nn/ n[%        U R                   5       H  u  nnUU:  ae  UU:  a_  [)        U5      n[        R                  R+                  UR                  UR,                  R.                  R0                  US   US   S%US   /S&9nUR                  U/5        M     SU;  aH  UR                  [        R                  R+                  S[        R                  R2                  S/S'9/5        SU;  aH  UR                  [        R                  R+                  S[        R                  R2                  S/S'9/5        S U;  aI  UR                  [        R                  R+                  S [        R                  R2                  / S(QS'9/5        U R'                  S)5        U R                   R                  U5        / n[%        U R                  5       H}  u  nnUU:  a_  [)        U5      n[        R                  R+                  UR                  UR,                  R.                  R0                  US   US   S%US   /S&9nUR                  U/5        M     U R'                  S*5        U R                  R                  U5        g+s  snf s  snf ),Nr`   r   rV  rb   r   r  FRelativePositionBiasr  #past_sequence_length_squeezed_int64r   zFound tensor name `z` to be renamed to ``zFound node to remove: type = z	, name = r  rt  past_sequence_length_squeezed!node_past_sequence_length_squeezer$  r  &node_past_sequence_length_squeeze_cast)r   r  past_sequence_length_int64past_sequence_length_castr   rQ  rR  rS  r  r  rZ   r  r   r/  rr  rs  ru  r  r   r   T)r   r   r  ry   r   r/  r   r   r  rW  r   r4  rx  r   r  r   rg  r  r   rw  ro  r5  r<   r  r  r  )rp  rY  rZ  r  output_self_past_0r\  r]  r}  	old_nodesr/  rel_pos_bias_noder  target_squeezed_past_seq_namer  r  name_to_renamenrr  r  rd  r  r  r  r   r  orig_input_namesr{  r  rm  r3  r|  s                                  r)   ?update_decoder_subgraph_share_buffer_and_use_decoder_masked_mhary    s   +/::6:R:6
a
(9(L(W(WX^(_(_Q a
(9(L(W(WX^(_(_c$**o(99Q>?JZ*;;II		<<//dV$ 
 9~
" 		<<11 $ 
/+ %J!.Ed.K+
!"Q&4N''77KLiKjjklm 5!B1"**YrwwiPQ " {{,,#$,-4	 - 
 KK)),-*+9   * 
	 	,	23		t{{a$5$AdkkRSnXiXoXopqXrFr--'(-.0$$ . I &,,Q/DJJqMi[)<<//t_F[[]CCq	 # 

1

1

1C JJTZZ1)<

1"EFJJTZZ1)<

1"EFJJTZZ1)<

1"EFJJTZZ1)<

1"EFJJ./0JJ~&JJ+,-JJTZZ1)<

1"EF23F./;;((1 YY	
 D &(4t11(EJJu%  5 dV$a d 	OOFIIY,0JJ7JSJ7J4::&2!!a*<&<RLE33''--77Qxq=%(C 4 B
 	2$ ' %55[[//0FHXHXH^H^ghfi/jk	
 ++4;;==lDL\L\LbLbkljm=nop"2222'$$**E 3 	
 	OOGJJj!K4;;'2""RLE33''--77Qxq=%(C 4 B
 	B4  ( 	OOHKK{#s 7Z 8s   ^)^.model_protoc                    [        U 5      nUR                  5       n/ n/ nUR                  5        GH  nUR                  S:X  d  M  SUR                  S   ;   a  SUR                  S   ;   a  M>  X%R                  S      nX%R                  S      nX%R                  S      nUR                  UR                  S   5      n	UR                  UR                  S   5      n
UR                  UR                  S   5      nU	(       a  U
(       a  U(       d    g[        R                  " U	5      n[        R                  " U
5      n[        R                  " U5      n[        R                  " XU/SS9nUR                  S	S
S9n[        R                  R                  US-   U	R                  S:X  a  [        R                   O[        R"                  UR$                  S   UR$                  S   /UR'                  5       R)                  5       S9nU R*                  R,                  R/                  U/5        [        R                  R1                  S	UR                  S   US-   /US-   /US9nUR2                  S   UR                  S'   SUR                  S'   SUR                  S'   UR/                  U/5        UR/                  XgU/5        GM     UR5                  U5        UR7                  U5        UR9                  5         UR;                  5         g)Nr  past_key_crossr`   past_value_crossr   r   Fr   r   
MatMul_QKV)name_prefix_weightr  _outr  r   T)r   r   nodesr   r   r   r   r   r   r   r  r   r4  r,  r   r   r  rx   r3  flattentolistr   r,  r   rx  r   	add_nodesremove_nodesupdate_graphr  )rz  
onnx_modelr   nodes_to_addr  r/  r9  r>  rB  q_weightk_weightv_weightrI  rJ  rK  rL  matmul_node_nameweightr   s                      r)   pack_qkv_for_decoder_masked_mhar  P  s   ;'J$88:LO  "<<<<4::a=05G4::VW=5X*::a=9H*::a=9H*::a=9H!11(..2CDH!11(..2CDH!11(..2CDHh%%h/B%%h/B%%h/B1=J)::8Q]:^[[,,%	1080B0Ba0G;,,[M`M` &&q):+;+;A+>?'')002	 - F ))00&:++// q)+;i+GH)F23%	 0 K (..q1DJJqMDJJqMDJJqM.""H#ABU #X &O,!r,   decoder_onnx_pathc                 :   [         R                  " U SS9n[        [        UR                  R
                  5      5       H  nUR                  R
                  U   R                  S:X  d)  UR                  R
                  U   R                  S:X  d  MS  UR                  R
                  U   R                  R                  R                  R                  S   nUR                  S5      (       a  UR                  5         SUl        M     [        R                  " UU US9  g)a=  Update the input shapes for the inputs "input_ids" and "position_ids" and make the sequence length dim value 1 for each of them.
   The decoder model will be over-written.

Args:
    decoder_onnx_path (str): Path of GPT-2 decoder onnx model
    use_external_data_format(bool): output tensors to external data or not.
Tr   r   r   r`   rk  r   )r   r   r   r   r   r   r   r<   r  r3  rj  HasFieldClearrl  r   r   )r  rP   r   r  shape_dim_protos        r)   *update_input_shapes_for_gpt2_decoder_modelr    s     //*;PTU3*006678%%++A.33{B"((..q166.H177==a@EEQQWW[[\]^O ''44%%' )*O% 9 NN6
 r,   init_decoder_onnx_pathc           	         [         R                  " U SS9nUR                  R                  S   R                  n[        U5      nUR                  5       nXF;   d   eXd   nUR                  S:w  a  gUR                  U/ SQ/ SQ5      nUc  UR                  U/ SQ/ S	Q5      nUc1  UR                  U/ S
Q/ SQ5      nUc  UR                  U/ SQ/ SQ5      nUc  gUS   n	U	R                  S:H  n
U
(       dt  SnUR                  U	/ SQUSSS/5      nUc  SnUR                  U	/ SQUSSS/5      nUc  SnUR                  U	/ SQUSS/5      nUc  SnUR                  U	/ SQUSS/5      nOoSnUR                  U	/ SQUSS/5      nUc  SnUR                  U	/ SQUSS/5      nUc  SnUR                  U	SS/US/5      nUc  SnUR                  U	SS/US/5      nUc  gUS:X  a  SOSnU
(       d  UR                  U	SU5      nOUR                  U	SU5      nUc  gUS   nUS   n[         R                  R                  S[        R                  S/S/S9n[         R                  R                  S[        R                  S/S/S9n[         R                  R                  S[        R                  S/S/S9n[         R                  R                  S[        R                  S/S/S9nUR                  U5        UR                  U5        UR                  U5        UR                  U5        SUR                  S   -   n[         R                  R                  SUR                  S   SSSS/U/UR!                  SS5      S9nU
(       d  UR                  S   OUR                  S    nSUR                  S   -   n[         R                  R                  SUSSSS/U/UR!                  SS!5      S9nUR#                  U5        UR#                  U5        UR%                  UUR                  S   U5        UR%                  U	UU5        UR'                  5         [
        R(                  " UUUS"9  g)#al  Generates the initial decoder GPT2 subgraph and saves it for downstream use.
   The initial decoder model will be saved to init_decoder_onnx_path.

Args:
    decoder_onnx_path (str): Path of GPT-2 decoder onnx model
    init_decoder_onnx_path (str): Path of GPT-2 init decoder onnx model
    use_external_data_format(bool): output tensors to external data or not.
Tr   r   r   F)r  r  r  r  r  r   r  FastGelur  r   r  r  r  )r   r   r   r`   r   r   r   r   r   r   r   r   r   )
r  SkipLayerNormalizationr  r   r  r  r  r   r  r  )
r   r   r`   r   r   r   r   r   r   r   )r  r  r  r   r  r   r  r  )r   r   r`   r   r   r   r   r   )r  r   r  r   r  )r   r`   r   r   r   rf   r  )r  r  r   rv  r`   )r  r   rv  )r  r   rv  rv  r  r  SliceLastTokenStartsr  SliceLastTokenEndsSliceLastTokenAxesSliceLastTokenStepsedge_modified_r  GatherLastToken_0_r  r   GatherLastToken_1_r   )r   r   r   r   r   r   r   r   r  r   r4  r,  r   r  r+  rx  r  r  replace_node_inputr  r   )r  r  rP   init_decoder_model_protor   gpt2_init_decoder_modelr   logits_matmul_node"logits_matmul_to_residual_add_pathresidual_add_nodeis_skiplayernorm_path&residual_add_to_attention_parent_indexresidual_add_to_attention_path residual_add_to_add_parent_indexadd_before_residual_add	attentionmatmul_after_attentionslice_starts
slice_ends
slice_axesslice_stepsslice_0_output_nameslice_node_0add_before_residual_add_outputslice_1_output_nameslice_node_1s                             r)   generate_gpt2_init_decoderr    s     $/@UYZ177>>qAFF'(@A1EEG444,@ !!X- *A)R)R	
 	0#*&* *1-D-V-V +.
*$ *1-D-V-V	 %.
*  .51H1Z1Z"  
2. *1:2> .559QQ !12.)@)R)R23Q1=*
& *1562-D-V-V!67AqA.* *1562-D-V-V!.7A>.* *1562-D-V-V!.7A>.* 23.)@)R)R+3Q:*
& *1562-D-V-V!/7A>.* *1562-D-V-V!;'7;.* *1562-D-V-V!;'7;.* &-,RVW,Wq]^$ !"9"F"Fu&F#
 #:"F"F$,#
 &.r2I;B?;;**###ST	 + L ((!##ST	 ) J ((!##SS	 ) J ++))"##ST	 * K ++L9++J7++J7++K8 +Y-=-=a-@@;;((Q"  !
 %%$55g?ST ) L" 2G&&q)LcLjLjklLm # +-D-K-KA-NN;;((*"  !
 %%$55g?ST ) L $$\2$$\2 ../EyGWGWXYGZ\op../@B`buv ,,. NN 6
 r,   c                 4   [        S5      n[        UR                  5      n[        UR                  5      n[        UR                  5      nU R                  R
                   H  nUR                  R                  R                  R                   H\  nUR                  S5      (       d  M  UR                  UUUU4;   d  M1  [        UR                  5      nUR                  5         Xl        M^     M     U R                  R                   H  nUR                  R                  R                  R                   H\  nUR                  S5      (       d  M  UR                  UUUU4;   d  M1  [        UR                  5      nUR                  5         Xl        M^     M     g)z_Make dim_proto numeric.

Args:
    model: T5 encoder and decoder model.
    config: T5 config.
r`   rk  N)rr   r  d_modeld_kvr   r   r<   r  r3  rj  r  rk  ry   r  rl  r   )	r   configr  r  hidden_size	head_sizerN  	dim_protorl  s	            r)   make_dim_proto_numeric_t5r  	  sG    !fOF$$%Ifnn%KFKK I++$$0066::I!!+..93F3F	K 4  	 3 34	!&/# ; % ++##0066::I!!+..93F3F	K 4  	 3 34	!&/# ; $r,   generation_typec                 /   U R                   S:H  nU[        R                  :H  nU[        R                  :H  nU[        R                  :H  nU R
                  n[        R                  SU 35        [        U R                  5      S:X  a  U R                  S   S:X  aw  U(       ai  U R                  [        R                  R                  :X  aA  / SQU l	        [        R                  SU R                   35        [        R                  S5        O/ U l	        U(       d  U(       aJ  U(       d  [        S	5      eU R                  (       a  [        S
5      eU R                   (       a  [        S5      eU(       a#  U(       a  U R"                  (       d  [%        S5      eU R"                  (       a  U(       d  [%        S5      eU R"                  (       a  U R&                  (       d  [%        S5      eU(       Ga  U R(                  (       aR  [*        R,                  R/                  U R(                  5      (       a$  [        R                  SU R(                   35        GO%U R(                  (       dX  U R0                   SU R                   S3n[3        [3        U R4                  5      R6                  U5      R9                  5       U l        [        R                  SU R0                   SU R(                   S35        [;        U 5        OU R(                  (       aA  U R<                  (       a0  [        R                  SU R(                   SU R<                   35        O.[        R                  SU R0                   S35        [?        U 5        SnU R@                  (       d  U R                  [        R                  R                  :X  a{  U(       at  U(       d  U(       d  U(       a_  [        R                  SU R(                   S35        [C        U R(                  U RD                  5      nU(       d  [        RG                  S5        Sn	Sn
U RH                  (       d  U(       a  U(       d  U(       d  U(       a  [        R                  SU R(                   S35        S U R                   S3n[3        [3        U R4                  5      R6                  U5      R9                  5       n
[K        U R(                  U
U RD                  5      n	U	(       d  [        RG                  S!5        U	(       a0  [M        U R(                  U RD                  5      (       d  [%        S"5      eU(       d  U RN                  (       d  U	(       ax  [        R                  S#U R(                   S35        [Q        U R(                  U RD                  5        U	(       a.  [        R                  S#U
 S35        [Q        XRD                  5        U(       a*  [R        RT                  " U R0                  U RV                  S$9nOcU R                   S%:X  a*  [X        RT                  " U R0                  U RV                  S$9nO)[Z        RT                  " U R0                  U RV                  S$9nU R\                  (       a  [        R                  S&U 35        UR^                  nU(       a  UR^                  OUR`                  nURb                  nU Rb                  S':w  a  U Rb                  nU R^                  S':w  a  U R^                  nU R`                  S':w  a  U R`                  n[d        Rf                  " U R(                  S(S)9nU R                    S*3URh                  l5        SnU R                   S:X  a{  [m        URh                  U R                  5        U	(       aS  [d        Rf                  " U
S(S)9nU R                    S+3URh                  l5        [m        URh                  U R                  5        O [o        URh                  U R                  5        SnU(       a  / S,QnOU(       d  U(       a  / S-QnU Rp                  (       a  URs                  S.5        OURs                  S/5        U Rt                  (       a  URs                  S05        OURs                  S/5        U Rv                  (       a  URs                  S15        OURs                  S/5        U(       ag  U Rx                  (       a#  U Rz                  (       a  URs                  S25        OURs                  S/5        U R|                  (       a  URs                  S35        S4/nU R                  (       a  URs                  S55        U R                   (       a)  U R                  (       d   S65       eURs                  S75        SnU(       a.  [d        R~                  R                  S8UUS9U R                    3S:9nOiU(       a.  [d        R~                  R                  S;UUS<U R                    3S:9nO4U(       a-  [d        R~                  R                  S=UUS>U R                    3S:9nS?UlA        SnU(       a  [d        R~                  R                  S@U5      [d        R~                  R                  SAU5      [d        R~                  R                  SBU R                  5      [d        R~                  R                  SCU R                  (       a  SOS5      [d        R~                  R                  SDU R                   S:X  a  SOS5      /nGO;U(       a  [d        R~                  R                  S@U5      [d        R~                  R                  SAU5      [d        R~                  R                  SDU R                   S:X  a  SOS5      [d        R~                  R                  SBU R                  5      /nGOU(       Ga  [d        R~                  R                  S@U5      [d        R~                  R                  SAU5      [d        R~                  R                  SDU R                   S:X  a  SOS5      [d        R~                  R                  SBU R                  5      [d        R~                  R                  SEU R                  5      [d        R~                  R                  SFU R                  5      [d        R~                  R                  SGU R                  5      [d        R~                  R                  SHU R                  5      [d        R~                  R                  SIU Rx                  5      [d        R~                  R                  SJU R                  5      /
nU(       a0  UR                  [d        R~                  R                  SKU5      /5        UR                  R                  U5        / nU R                   SL;   Ga  U RN                  (       aC  [        R                  SMU R<                   S35        [Q        U R<                  U RD                  5        [d        Rf                  " U R<                  S(S)9n[        URh                  R                  5      SN:X  a  SOOSPnU R                    SQU 3URh                  l5        [        URh                  U R                  5        [        UU5        [        UU5        U(       a  U R"                  (       d  [%        SR5      e[        R                  SS5        [        URh                  5      (       a  [        R                  ST5        O[        R                  SU5        [        U5      (       a  [        R                  SV5        O[        R                  SW5        U R                  (       dK  [        UU5      n[        R                  [        U5       SXU Vs/ s H  nURj                  PM     sn SY35        UR                  S:  d   SZ5       eUR                  R                  [d        R~                  R                  SOURh                  5      [d        R~                  R                  S[URh                  5      [d        R~                  R                  S\UR                  5      /5        GOU	(       Ga	  U R                  (       dK  [        UU5      n[        R                  [        U5       SXU Vs/ s H  nURj                  PM     sn S]35        U(       a*  [        R                  S^5        [        URh                  5        U R"                  (       a'  [        URh                  US5      (       d  [%        S_5      eUR                  Rs                  [d        R~                  R                  S`URh                  5      5        O6[        URh                  5      n[        R                  [        U5       Sa35        U(       a*  [        R                  Sb5        [        URh                  5        U R"                  (       a'  [        URh                  US(5      (       d  [%        Sc5      eUR                  Rs                  [d        R~                  R                  S[URh                  5      5        [d        R~                  R                  Sd[        R                  SeSf/5      n[d        R~                  R                  Sg[        R                  S/5      n[d        R~                  R                  Sh[        R                  S/5      n[d        R~                  R                  Si[        R                  S/5      n[d        R~                  R                  Sj[        R                  S/5      n[d        R~                  R                  Sk[        R                  S/5      n[d        R~                  R                  Sl[        R                  S/5      n Sn!U(       a
  UUUUUUU /n!OU(       d  U(       a  UUUU /n!U Rp                  (       aA  [d        R~                  R                  S.[        R                  U/5      n"U!Rs                  U"5        U Rt                  (       aB  [d        R~                  R                  S0[        R                  SeU/5      n#U!Rs                  U#5        U Rv                  (       aB  [d        R~                  R                  S1[        R                  SeSf/5      n$U!Rs                  U$5        U Rx                  (       aS  U Rz                  (       aB  [d        R~                  R                  S2[        R                  SeU/5      n%U!Rs                  U%5        U(       aR  U R|                  (       aA  [d        R~                  R                  S3[        R                  S/5      n&U!Rs                  U&5        Sn'U(       a2  [d        R~                  R                  S4[        R                  / SmQ5      n'O?U(       d  U(       a1  [d        R~                  R                  S4[        R                  SeSg/5      n'U'/n(U R                  (       aB  [d        R~                  R                  S5[        R                  SeSj/5      n)U(Rs                  U)5        U R                   (       aD  [d        R~                  R                  S7[        R                  SnSeSiU/5      n*U(Rs                  U*5        [d        R~                  R                  U/U(       d  U R                    So3OU R                    Sp3U!U(U5      n+[d        R~                  R                  U+SqUR                  Sr9n,U RD                  (       ao  SSsK^J_n-  U-R                  [d        R                  5      U-R                  St5      :  a  [        RG                  Su5        [        R                  " U,U R4                  S(S(Sv9  O![d        R                  " U,U R4                  5        [        R                  SwU R4                   35        gs  snf s  snf )xz|Convert model according to command line arguments.

Args:
    args (argparse.Namespace): arguments parsed from command line
r>   z**** past_present_share_buffer=r`   r   rM   )r  r  r  r  z**** Setting op_block_list to zI**** use --op_block_list if you want to override the block operator list.z<Currently only gpt2 with greedy search/sampling is supportedzLoutput_sequences_scores currently is not supported in greedy search/samplingzHoutput_token_scores currently is not supported in greedy search/samplingzi`use_decoder_masked_attention` MUST be turned on to use `past_present_share_buffer` in case of BeamSearchzS`past_present_share_buffer` MUST be turned on to use `use_decoder_masked_attention`z?`use_decoder_masked_attention` option is only supported on GPUsz)skip convert_to_onnx since path existed: _past_z.onnxzConvert GPT model z	 to onnx z ...z,skip convert_to_onnx since paths specified: z and zConvert model z to onnx ...Fz=Pad logits MatMul weights for optimal MatMul perf in fp16 on z. The file will be overwritten.z]Tried and failed to pad logits MatMul weights. Performance may be sub-optimal for this MatMulNz*Creating an initial run GPT2 decoder from z. gpt2_init_past_zuTried and failed to generate the init decoder GPT2 model. Performance may be sub-optimal for the initial decoding runzGCould not update the input shapes for the non-initial decoder subgraph.z Run symbolic shape inference on r   r?   zConfig=rf   Tr   z decoderz init decoderr   
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   r  r  r  rY   r   r\   r   r^   r_   	sequencessequences_scoresz8--output_token_scores requires --output_sequences_scoresscores
BeamSearchBeamSearch_r  GreedySearchGreedySearch_Sampling	Sampling_r  eos_token_idpad_token_idno_repeat_ngram_sizerX   r   temperaturetop_pfilter_valuemin_tokens_to_keepcustompresence_penalty
vocab_sizer?   r@   zSymbolic shape inference on r   rH  zencoder and decoder init zMpast_present_share_buffer is only supported with use_decoder_masked_attentionzl*****update t5 decoder subgraph to share past/present buffer and use decoder_masked_multihead_attention*****z4*****update t5 decoder subgraph successfully!!!*****zF*****DecoderMaskedMultiHeadAttention is not applied to T5 decoder*****z9*****pack qkv for decoder masked mha successfully!!!*****z3*****pack qkv for decoder masked mha failed!!!*****z shared initializers (z>) in encoder and decoder subgraphs are moved to the main graphz%decoder_start_token_id should be >= 0rI  decoder_start_token_idzC) in decoder and init decoder subgraphs are moved to the main graphzY*****update init decoder subgraph to make past and present share buffer******************zLCould not update the init decoder subgraph to use DecoderMaskedSelfAttentioninit_decoderz: initializers from the decoder are moved to the main graphzT*****update decoder subgraph to make past and present share buffer******************zGCould not update the decoder subgraph to use DecoderMaskedSelfAttentionr   r  r  r  r  r  r  r  r  )r  r  r  zmax_length - sequence_lengthz beam searchz greedy searchzonnxruntime.transformers)producer_nameopset_imports)versionz1.12.0z0Require onnx >= 1.12 to save large (>2GB) model!)r   all_tensors_to_one_filezmodel save to )dr   r    r2   r3   r4   rZ   r   r   r   r   r   r   rx   r'   NotImplementedErrorrV   rW   r[   r   ri   r   rt   ru   existsr   r   r   r   as_posixr   r   r   rR   r   rP   r   rS   r  r  rQ   r   r   from_pretrainedr   r   r   rH   r  r  r  r   r   r   r   r  r  rY   r   r\   r]   r  r^   r_   r4  rx  rb  rX  r  rX   r  r  r  r  r  r   r_  r   r$  r  ry  r  rT   rK  r  r  r  rO  r5  r   r  r  
make_graph
make_modelopset_import	packagingr  parse__version__r   r   ).r   r  is_gpt2is_beamsearchis_greedysearchis_samplingrZ   onnx_filenamelogits_matmul_weight_paddedgpt2_init_decoder_generatedgpt2_init_decoder_onnx_pathgpt2_init_decoder_onnx_filenamer  r  r  r  r   r  r  r  r/  attr_to_extendrJ  rA  suffixr  r   r  r  r  r  r  r  graph_inputsrY   r\   r   r^   r_   r  graph_outputsr  r  	new_graph	new_modelr  s.                                                 r)   convert_generation_modelr  	  s
    OOv-G)^-F-FFM+~/J/JJO'>+B+BBK&*&D&D
KK12K1LMN
4!#(:(:1(=(Gt~~):):)@)@@"D KK89K9K8LMNKKcd!#D+%&dee''%&tuu##%&pqq !]4;\;\w
 	
 ((1Jnoo ((Z[[0A0A!B!BKKCDDUDUCVWX$$#'#:#:";6$..AQQV W$(dkk):)A)A=$Q$Z$Z$\!KK,T-D-D,EYtO`O`Naaefg!?!?KK>t?P?P>QQVW[WuWuVvw KK.)@)@(ANOt #(''NNi//555oKDL]L]K^ _, ,	
 'C4CTCTVZVsVs&t#*NNo #("&;;o@ARAR@SSUVW,;DNN;K5*Q'&*4+<+C+CEd&e&n&n&p#&@'))'
# +NNN '/Yt<<0
 0
 fgg
 #d&>&>B]6t7H7H6IIhij))4+H+HI&KK:;V:WWvwx79V9VW++D,C,Ct~~^	D	 ))$*A*AT^^\**4+B+Bdnn]||gfX&'&&L*16&&v7J7JL""J "__
B((B((OOD$5$5$OM"&//!2(;M"& ]00$..A '&*oo6Qfj&k#48OO3DM1R#)). !8!>!>O"=#6#6GF
 
K
 l#b)*b!!&'b;;4--MM/*MM"99MM&!mG##)*++g-gg+x D{{$$t/0	 % 
 
{{$$  12	 % 
 
{{$$T__-.	 % 
 "DKNKK&&~|DKK&&~|DKK&&'=t?X?XYKK&&'7d>Q>QWXYKK&&|$//V:SQYZ[
 
KK&&~|DKK&&~|DKK&&|$//V:SQYZ[KK&&'=t?X?XY	
 
KK&&~|DKK&&~|DKK&&|$//V:SQYZ[KK&&'=t?X?XYKK&&}d6F6FGKK&&w

;KK&&~t7H7HIKK&&';T=T=TUKK&&x=KK&&'94;P;PQ
 #t{{99,
STUNN.)L-'##KK6t7U7U6VVuvwD::D<Y<YZ(F(F[_`!-"5"5";";<AGa&*oo%6ax#@ /0C0CT^^T!-8!-8 %44 !pqqKK~ O}ObObccRSde.}==WXQR//2=-PLKK|$%%;\<Z\QVV\<Z;[  \Z  [ ,,1Z3ZZ1**9m6I6IJ**9m6I6IJ**+CVEbEbc	
 ' 3367NP]^<())?Q]@^Q]AQ]@^?_  `c  d
 )wxABYB_B_` 009m'--}e: : !!oppNN!!$++"<"<^MdMjMj"kl -]-@-@ALKK3|,--ghi %KKno=m>Q>QR ,,5i6
 6
 fggdkk88MDWDWXY 22;@Q@QT`bsStuI33L+BSBSVWUXYJ33L+BSBSVWUXYJ22;@Q@QTUSVWI;;==>TVaVgVgjkilm[[778H+J[J[^_]`aN;;<PR]RcRcfgehiL 
 
K	
 [[77kFWFWZdYef
J' KK>>!2!2\:4N
 	-.!!;;k//,@Q1R
 	N+{{t))::[..z0J
 	M*tyy{{11&+:K:KaSQD! IKK66@
	
 
KKK66<(
	 KM##;;==12

 	-.33+\;
S

 	V$&&	1@DOOL	)HYYgFhI &&0#00 ' I $$%==))*W]]8-DDNNMNKK"&$(		
 			)T[[)
KK../k =[8 A_s   A_B A_r   r   r  r  bad_words_idsc                    U R                   (       a.  [        R                  R                  5       (       d  [	        S5      eU R
                  [        R                  R                  :X  a  UR                  5         [        R                  " U R                   (       a  SOS5      nUR                  U5        [        R                  " S5        UR                  U5      nUR                  U5      n/ n[        U R                  5       H  n	[        R                  " 5       n
UR!                  UUU R"                  U R$                  U R&                  U R(                  U R*                  UUU R,                  U R.                  U R0                  U(       a  UOSSU R2                  =(       d    U R4                  S9n	UR7                  [        R                  " 5       U
-
  5        M     UR8                  S   nSS	KJn  U" X5      $ )
ah  Test PyTorch performance of text generation.

Args:
    args (argparse.Namespace): arguments parsed from command line
    model (Union[GPT2LMHeadModel, T5ForConditionalGeneration]): PyTorch model
    input_ids (torch.Tensor): input_ids
    attention_mask (torch.Tensor): Attention mask
    eos_token_id (int): EOS token ID
    pad_token_id (int): Padding token ID
    bad_words_ids (List[List[int]]): Words shall not be generated.

Raises:
    RuntimeError: PyTorch with CUDA is not available for --use_gpu

Returns:
    Dict[str, Any]: A dictionary with string with metric name, and value can be integer or string.
z=Please install PyTorch with Cuda for testing gpu performance.zcuda:0cpuFNTr   r   r  r  r  rX   r  r  r  r  r  r  r  return_dict_in_generateoutput_scoresr   get_latency_result)ri   torchcudais_availabler   r   r   rx   r'   halfdevicer  set_grad_enabledr   
total_runstimegenerater  r  r  rX   r  r  r  r  rV   rW   r   r3  benchmark_helperr	  )r   r   r   r   r  r  r  r  torch_latencyr  startr  r	  s                r)   test_torch_performancer    ss   4 ||EJJ3355Z[[~~**000

\\dll(>F	HHV	5!V$I#&&v.NM4??#		NN)nn..!%!:!:%%!%!:!:..#66+8-d$(66R$:R:R  
" 	TYY[501' $( #J3m88r,   c                    [         R                  " U R                  [         R                  S9n[	        U R                  S   5       HC  nSn[	        U R                  S   5       H"  nX   U   U:X  a  US:X  a	  SX#   U'   M  US-  nM$     ME     U$ )Nr   r   r`   )r   onesr3  int32r   )r   r  r   r  abs_posr>  s         r)   create_attention_maskr  9  s    WWY__BHH=N9??1%&yq)*A|A,.7a<'(!!$1	 + ' r,   	sentences	is_greedyc                 t   U R                   S:X  d   e[        R                  " U R                  U R                  S9nSUl        UR                  Ul        [        R                  " U R                  U R                  UR                  S9nUc  / SQnU" USSS	9nUS
   nUS   nSnUR                  USS9n	U	 V
s/ s H  o/PM     n	n
U R                  (       a  [        R                  SU	5        O/ n	UR                  nUR                  nUR                  nUR                  n/ nSnU R                   (       Gdt  [#        S5        [#        S5        UR%                  UUU R&                  U R(                  U R*                  U R,                  U R.                  UUU R0                  U R2                  U R4                  U	(       a  U	OSSU R6                  =(       d    U R8                  S9n[#        S
U5        [#        S5        [#        SUR:                  5        U R6                  (       a  [#        SUR<                  5        U R8                  (       a  [#        SUR>                  5        [A        UR:                  5       H7  u  nnURC                  USS9nURE                  U5        [#        U SU 35        M9     [#        S5        [#        S5        U(       a  URG                  5       RI                  5       RK                  [L        RN                  5      [L        RP                  " U R&                  /[L        RN                  S9[L        RP                  " U R(                  /[L        RN                  S9[L        RP                  " U R4                  /[L        RR                  S9S.nGOKURG                  5       RI                  5       RK                  [L        RN                  5      [L        RP                  " U R&                  /[L        RN                  S9[L        RP                  " U R(                  /[L        RN                  S9[L        RP                  " U R*                  /[L        RN                  S9[L        RP                  " U R0                  /[L        RN                  S9[L        RP                  " U R2                  /[L        RR                  S9[L        RP                  " U R4                  /[L        RR                  S9S.nU R                  (       aG  [L        RT                  " U[L        RN                  S9nU R                  (       a  U	 H  nSUU'   M
     UUS'   U RV                  (       a  [Y        Xm5      US'   URZ                  S   nU R\                  (       a?  [        R_                  S5        [L        RT                  " UU4[L        RN                  S9nUUS'   U R`                  (       a  [c        U Rd                  5      Rf                  Ri                  5       n[        R                  S U5        SS!K5J6n  [        R_                  S"U S#35        U/n[A        U5       H;  u  nn[n        Rp                  Rs                  US$[u        U5      -   5      nU" UU5        M=     [        R                  S%U5        U Rv                  (       a  g[        R                  S&5        [y        U Rd                  U Rz                  U R|                  5      n[        R                  S'5        UR                  SU5      n/ n[        U R                  5       HQ  n[        R                  " 5       n UR                  SU5      nURE                  [        R                  " 5       U -
  5        MS     SS(KCJDn!  URZ                  S   nU!" UU5      n"[#        S)5        US   n#[#        SU#5        U R6                  (       a  [#        SUS*   5        U R8                  (       a  [#        SUS+   5        U(       aY  U#RZ                  u  nn$/ n%[        U5       H8  nURC                  U#U   SS9nU%RE                  U5        [#        S,U S-U 35        M:     OqU#RZ                  u  nn&n$/ n%[        U5       HP  n[        U&5       H>  n'URC                  U#U   U'   SS9nU%RE                  U5        [#        S,U S.U' SU 35        M@     MR     U(       a  UR:                  R                  UU R0                  S/5      n([        R                  " U#5      n)[#        S5        [#        S05        [#        U(5        [#        U5        [#        S5        [#        S15        [#        U)5        [#        U%5        [#        S5        UU%:H  n*[#        S2U*(       a  S3OS45        U*U"S5'   U R                  (       a  [        U UUUUUU	5      n+[#        S6U+5        [#        S7U"5        U"$ s  sn
f )8a!  Test GPT-2 model

Args:
    args (argparse.Namespace): arguments parsed from command line
    sentences (Optional[List[str]], optional): input text. Defaults to None.

Returns:
    Union[Dict[str, Any], None]: A dictionary with string with metric name, and value can be integer or string.
r>   r  left)r   r  N)zThe product is releasedzI enjoy walking in the parkzTest best way to investptTreturn_tensorsr   r   r   walk in park)add_prefix_spacer  2--------------------------------------------------CTest PyTorch model and beam search with huggingface transformers...r  !huggingface transformers outputs:r  r  r  skip_special_tokens: 'Testing beam search with onnxruntime...r   r  r  r   rY   zYUse prefix vocab mask with all ones in ORT, but no corresponding setting for Torch model.r\   test_data_diroutput_test_datazSaving test_data to z/test_data_set_* ...test_data_set_
ORT inputszCreating ort session......zRun ort session......r  ORT outputs:r`   r   batch z sequence: 
 sequence rf   Torch Sequences:ORT Sequences:zTorch and ORT result issame	differentparityTorch LatencyORT)Jr   r   r  r   r   padding_side	eos_token	pad_tokenr   r  encoderY   r   r   r  r  rj   rW  r  r  r  r  rX   r  r  r  r  rV   rW   r  r  r  r   decoder   r  numpyastyper   r  arrayfloat32r  r]   r  r3  r\   r   rm   r   r   r   r  bert_test_datar.  rt   ru   rs   rr   rk   r   ri   rg   runr   r  r  r  r	  r.  r
  
LongTensorrl   r  ),r   r  r  	tokenizerr   r  r   r   	bad_wordsr  word_idr  r  r  r  torch_decoded_sequencesbeam_outputsr  sequencedecoded_sequencerY   bad_word_idr  r\   r,  r.  
all_inputsdirr   resultlatencyr  r  r	  r   r  r  ort_decoded_sequencesnum_sequencesr>  torch_sequencesort_sequencesis_sametorch_latency_outputs,                                               r)   test_gpt_modelrY  E  s    ??f$$$--d.E.EQUQ_Q_`I#I#--I++..++E 
	 ytDF{#I,-NI$$Y$FM.;<m7YmM<_m4\\F&&L&&L""J LhST~~)nn..!%!:!:%%!%!:!:..#66+8-d$(66R$:R:R & 
" 	k9%12k<112''$l&C&CD##(L//0$\%;%;<KAx(//d/S#**+;<QCr*+,- =
 
(O	
34"..077A((DOO#4BHHE((DOO#4BHHE"$((D,C,C+DBJJ"W	
 #..077A((DOO#4BHHE((DOO#4BHHE4>>"2"((C$&HHd.G.G-HPRPXPX$Y hh(;(;'<BJJO"$((D,C,C+DBJJ"W
 WWj:
??,*+
;'  -)|!!#8#Q #JopGGZ$<BHHM&7"#T[[)0099;_m43*=/9MNOX
":.IAv'',,}.>Q.GHCS&) / LLv&
LL-.$T[[$,,@X@XYK
LL()__T6*F G4??#		OOD&)tyy{U*+ $
 4#J4F	.q	I	+y!## &),hq	"#,?? Z "z"A(//	!RV/W!(()9:F1#[)9(:;< #
 3<///]J "z"A=)#,#3#3IaLOY]#3#^ %,,-=>qcA3b1A0BCD * # &0088TE^E^`bc((3h !o%&hm#$h)-BB'7L"x5 
 	o34	%MY =s   7f5c                 l   U R                   S;   d   eU R                  (       a  [        R                  S5        g[        R
                  " U R                  U R                  S9nSUl        U R                   S:X  a*  [        R
                  " U R                  U R                  S9nO)[        R
                  " U R                  U R                  S9nUc  SS/nU" US	S
S9nUS   nUS   nSnUR                  U5      SS nU V	s/ s H  o/PM     nn	U R                  (       a  [        R                  SU5        O/ nUR                  n
U
R                  nU
R                  nU
R                   n[        R                  SU SU SU 35        / nU R"                  (       Gdt  [%        S5        [%        S5        UR'                  UUU R(                  U R*                  U R,                  U R.                  U R0                  UUU R2                  U R4                  U R6                  U(       a  UOSS
U R8                  =(       d    U R:                  S9n[%        SU5        [%        S5        [%        SUR<                  5        U R8                  (       a  [%        SUR>                  5        U R:                  (       a  [%        SUR@                  5        [C        UR<                  5       H7  u  nnURE                  US
S9nURG                  U5        [%        U SU 35        M9     [%        S5        [%        S5        [H        RJ                  " U[H        RL                  S9nU R                  (       a  U H  nSUU'   M
     URO                  5       RQ                  5       RS                  [H        RL                  5      [H        RT                  " U R(                  /[H        RL                  S9[H        RT                  " U R*                  /[H        RL                  S9[H        RT                  " U R,                  /[H        RL                  S9[H        RT                  " U R2                  /[H        RL                  S9[H        RT                  " U R4                  /[H        RV                  S9[H        RT                  " U R6                  /[H        RV                  S9S .nU R                  (       a  UUS!'   U RX                  (       a  [[        X\5      US'   U R\                  (       a  [_        U R`                  5      Rb                  Re                  5       n[        R                  S"U5        SS#K3J4n  U/n[C        U5       H;  u  nn[j        Rl                  Ro                  US$[q        U5      -   5      nU" UU5        M=     [        R                  S%U5        [s        U R`                  U Rt                  U Rv                  5      n/ n[y        U Rz                  5       HQ  n[|        R|                  " 5       nUR                  SU5      nURG                  [|        R|                  " 5       U-
  5        MS     UR                  S   nSS&KAJBn  U" UU5      n [%        S'5        WS   n![%        SU!5        U R8                  (       a  [%        SUS(   5        U R:                  (       a  [%        SUS)   5        U!R                  u  nn"n#/ n$[y        U5       HP  n[y        U"5       H>  n%URE                  U!U   U%   S
S9nU$RG                  U5        [%        S*U S+U% SU 35        M@     MR     U R"                  (       d  WR<                  R                  UU R2                  S5      n&[        R                  " U!5      n'[%        S5        [%        S,5        [%        U&5        [%        U5        [%        S5        [%        S-5        [%        U'5        [%        U$5        [%        S5        UU$:H  n([%        S.U((       a  S/OS05        U(U S1'   U R                  (       a  [        U UUUUUU5      n)[%        S2U)5        [%        S3U 5        U $ s  sn	f )4a%  Test T5 or MT5 model

Args:
    args (argparse.Namespace): arguments parsed from command line
    sentences (Optional[List[str]], optional): input text. Defaults to None.

Returns:
    Union[Dict[str, Any], None]: A dictionary with string with metric name, and value can be integer or string.
r  zLSkipping parity test as prefix vocab mask is not implemented by Hugging FaceNr  r  r?   z4translate English to French: The product is releasedzsummarize: research continues to show that pets bring real health benefits to their owners. Having a dog around can lead to lower levels of stress for both adults and kids.r   Tr!  r   r   r#  rf   r  zeos_token_id:z, pad_token_id:z, vocab_size:r%  r&  r  r'  r  r  r  r(  r*  r+  r   r   r  rY   r,  r-  r/  r0  r  r1  r`   r   r2  r3  r4  r5  zTorch and ORT result is r6  r7  r8  r9  r:  )Hr   r\   r   r   r   r  r   r   r;  r   r   r>  rY   r  r  r  r  rj   rW  r  r  r  r  rX   r  r  r  r  rV   rW   r  r  r  r   r?  r   r   r  r  r  r@  rA  rB  rC  r]   r  rm   r   r   r   r  rD  r.  rt   ru   rs   rr   r   ri   rg   r   r  r  rE  r3  r  r	  r.  r
  rF  rl   r  )*r   r  rG  r   r  r   r   rH  r  rI  r  r  r  r  rJ  rK  r  rL  rM  rY   rN  r,  r.  rO  rP  r   rR  r  r  rQ  r  r	  r   r  rT  r  rS  r>  rU  rV  rW  rX  s*                                             r)   test_t5_modelr[    s    ??m+++cd++D,C,Ct~~^I#I$*::##nn

 ,;;##nn
 B {
	 ytDF{#I,-NI$$Y/4M.;<m7YmM<_m4\\F&&L&&L""J
LL=ol^=YcXdef hST~~)nn..!%!:!:%%!%!:!:..#66+8-d$(66R$:R:R & 
$ 	k9%12k<112''$l&C&CD##(L//0$\%;%;<KAx(//d/S#**+;<QCr*+,- =
 
(O	
34*RXX6J(K&'J{# ) ]]_**,33BHH=hh0Ahh0AXXt~~.bhh? "$*C*C)DBHH U((D$7$7#8

K hh(?(?'@

SF )|!!#8#Q T[[)0099;_m43X
":.IAv'',,}.>Q.GHCS&) / LLv&$T[[$,,@X@XYK G4??#		v.tyy{U*+ $ #J34F	.q	I	+y!## &),hq	".7oo+Z
:}%A(//	!QUY/Z!(()9:F1#Zs"-=,>?@ &  &0088TE^E^`bc((3h !o%&hm#$h)-BB(G&M"x5 
 	o34	%M[ =s    `1c                 t   [        U 5      n[        UR                  5        UR                  S;   a  UR                  (       aF  [
        R                  R                  UR                  5      (       d  [        SUR                   35      eUR                  (       aF  [
        R                  R                  UR                  5      (       d  [        SUR                   35      eUR                  (       a  UR                  (       a"  UR                  (       a  UR                  (       d  [        S5      eUR                  S:H  =(       a    UR                  S:H  nUR                  S:X  a  U(       a  UR                  S:  as  UR                  S:  ac  [        U[        R                  5        [         R#                  S	5        UR                  S
:  d"  UR$                  (       d  UR&                  (       a  gO&[        U[        R(                  5        O[        U5        [         R#                  S5        UR                  S;   a
  [+        X!S9nO
[-        X!US9nU(       ae  UR.                  (       a2  [         R#                  SUR0                   SUR0                   S35        U$ [         R#                  SUR0                   35        U$ )a  Main entry function

Args:
    argv (Optional[List[str]], optional): _description_. Defaults to None.
    sentences (Optional[List[str]], optional): input text. Defaults to None.

Raises:
    ValueError: Path does not exist: --encoder_decoder_init_onnx
    ValueError: Path does not exist: --decoder_onnx
    ValueError: --decoder_onnx and --encoder_decoder_init_onnx are not used together for T5

Returns:
    Union[Dict[str, Any], None]: A dictionary with string with metric name, and value can be integer or string.
r  z1Path does not exist: --encoder_decoder_init_onnx z$Path does not exist: --decoder_onnx zB--decoder_onnx shall use together with --encoder_decoder_init_onnxr`   r>   re   rc   zThe test for gpt2_sampling onnx model is limited to non-custom model with small top_p(e.g <=0.01) value. The result should be the same as gpt2 greedy search.g{Gz?Nzstart testing model...)r  )r  r  zOutput files: r:   z.datazOutput file: )r   r   rH   r   r   rt   ru   r  r   r   r  r  r  r  r    r4   r   r   r  r_   r3   r[  rY  rP   r   )r6   r  r   r  rQ  s        r)   r   r     s     4 D-'))"''..A_A_2`2`PQUQoQoPpqrrRWW^^D4E4E%F%FCDDUDUCVWXX**43D3Dd&D&Dabb!#F(A(AQ(FI& Y::

S 0$T>+B+BCKK p zzD DKK499 4= %T>+F+FG &
KK()-'t9YO((KK.R}EJK M KK-}56Mr,   __main__r&   )T)shared_   NN)r_  )r   r`   rf   )NFr!  )a__doc__rn   loggingr   rt   r  enumr   pathlibr   typingr   r@  r   r   r
  r  r   r   fusion_utilsr   r	   r
   r   r  r   transformersr   r   r   r   r   r   r   r   onnxruntimer   r   r   r   4onnxruntime.transformers.models.gpt2.convert_to_onnxr   r   0onnxruntime.transformers.models.gpt2.gpt2_helperr   2onnxruntime.transformers.models.t5.convert_to_onnxr   r   ,onnxruntime.transformers.models.t5.t5_helperr   r   	getLoggerr   r    r  rr   	Namespacer   r   r   boolr   r   r   r  r  r$  ry   dictr@  rK  rO  r`  rg  ro  r  r  r  r  r  r  r  rT  rh  ry  r  r  r  r  r2   r  Tensorr  r  rY  r[  r.   r-   r,   r)   <module>rq     s  
#J    	        4 $ 4 4  	 	 	  T
 
		2	T Q$s)d* Qh6H6H Qh*)x)) *)Z!X'' !<Os Od O$KC K4 K[_ K\3  D Ue B5 5I 5pIodoo Io) IoXYa4?? Yay Ya~ #$($(g!g!g! g! 	g!
 Tkg! Tkg!T: j (  
+>"#Ji.J .bS
S&*S>BS	Slf3* f3R	 c * 9:WY 1	 1# 14PS9 1hf$I f$RD) D DT kkk k 	k
 k\&f &fR\* \~8 8v# ae F &*hhh #h 
	hV	"0N '5&?&?y0


y0#y0x?9


?977?9 ||?9 LL	?9
 ?9 ?9 S	??9 
#s(^?9D	 #'T


TCy4T Tnz** ztCy47G zz8tCy4 849t3C 8v zF r,   