
    h/                         S SK r S SKrS SKJr  S SKrS SKJr  S SKJr  S SK	J
r
  S SKJrJr  S SKJrJr  S SKJrJr  S S	KJr  \ R,                  " \5      r/ S
Qr/ SQr " S S5      rg)    N)Path)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSession)zt5-smallzt5-basezt5-largezt5-3bzt5-11b)zgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                      \ rS rSr\  S&S\S\S\S\S\4
S jj5       r\   S'S\S\S	\R                  S
\S\S\S\
\\\-  4   4S jj5       r\    S(S\\-  S	\R                  S\S\S\S\S\4S jj5       r\   S)S\S\\   S-  S\S\4S jj5       r\    S*S\S\S\S\S\S\S\S \S!\4S" jj5       r\S\\-  S#\S	\R                  S\4S$ j5       rS%rg)+T5Helper    
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 D   Un[         R                  R                  U5      (       a  [        U5      R                  S   nOUR                  S5      S     XB-  nU(       a  [         R                  R                  X5      OU n[         R                  R                  XTS-   5      $ )a  Build onnx path

Args:
    output_dir (str): output directory
    model_name_or_path (str): pretrained model name, or path to the model checkpoint
    suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
    new_folder (bool, optional): create a new directory for the model. Defaults to False.

Returns:
    str: path of onnx model
/z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directorys         f/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_pathT5Helper.get_onnx_path!   s|    $ (
77==+,,0177;JS!"%
<FBGGLL8J	ww||IG';<<    	cache_dirdevice
model_typestate_dict_pathencoder_decoder_initc           	         US:X  a  [         R                  " XS9nO&US:X  a  [        R                  " XS9nO[        S5      eU(       a%  UR	                  [
        R                  " U5      5        [        UR                  UR                  UR                  5      nUR                  5       R                  U5        [        UR                  UR                  UR                  UR                  SU(       + S9nU(       a  SOSn	XS	U0$ )
a+  Load model given a pretrained name or path, then build models for ONNX conversion.

Args:
    model_name_or_path (str): pretrained model name or path
    cache_dir (str): cache directory
    device (torch.device): device to run the model
    model_type (str, optional): model type "t5" or "mt5"
    state_dict_path(str, optional): state dictionary path
    encoder_decoder_init (bool, optional): combine encoder and decoder kv cache initialization into one model.
Returns:
    Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
t5)r%   mt5z only support mode_type=t5 or mt5N)decoder_start_token_idoutput_cross_onlyr)   encoderdecoder)r   from_pretrainedr   
ValueErrorload_state_dicttorchloadr   r0   lm_headconfigevaltor	   r/   )
r   r%   r&   r'   r(   r)   modelr0   r/   encoder_names
             r!   
load_modelT5Helper.load_model>   s    * .>>?QgE5 /??@RhE?@@!!%**_"=>EMM5==%,,G&!&MMMMMMLL#'"66
 2F-9y'::r$   r:   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc           	          [        U [        5      (       a  [        R                  " U UUUUUU5        g [        R                  " U UUUUU5        g )N)
isinstancer	   r
   export_onnxr   )r:   r&   r>   r?   r@   rA   rB   s          r!   rE   T5Helper.export_onnxl   sW     e122&22%(  ''( r$   N
onnx_modelop_block_listforce_fp16_logitsuse_symbolic_shape_inferc                    Uc  / SQnU R                  5        Vs1 s H  oDR                  iM     nn[        U5      nUR                  U5      n[        R                  SU SU 35        U R                  5       R                  S   R                  nSn	U R                  5       n
X;   d   eX   nSnUR                  S:X  a  Un[        R                  SUR                   35        SnUR                   H  nU R                  U5      nUc  M    O   [        U5      n[        R                  S	UR                   S
U 35        US:  n	O/[        R                  SUR                   SUR                   35        / n/ nU	(       d  Ub  U(       d  U/nUR                  /nSU;  Ga1  U R                  5       nSnSnU R!                  S5      nU(       a  SnU H  nUR                  U;  d  M  U R#                  XJ5      nU R%                  UU5      nU Vs/ s H  nUR                  U;   d  UU;   d  M  UPM     nnU Vs/ s H  nUR                  U;   d  UU;   d  M  UPM     nn['        U5      ['        U5      -   S:  d  M  UR)                  UR                  5        US-  nSnM     U(       a  M  ['        U5      U-
  n[        R                  SU SU 35        [        R                  SU 35        UUUU	S.n[        R                  SU 35        U(       a  U R*                  " SSS0UD6  U$ SSKJn  U" U R.                  4SS0UD6  U$ s  snf s  snf s  snf )aZ  Convert model to mixed precision.
   It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
Args:
    onnx_model (OnnxModel): optimized ONNX model
    op_block_list (List[str], optional): operators need to run in fp32.
    force_fp16_logits (bool, optional): force logits and last MatMul node to be in float16. Defaults to False.
    use_symbolic_shape_infer (bool, optional): use symbolic shape inference to convert float to float16. Defaults to True.
Returns:
    parameters(dict): a dictionary of parameters used in float16 conversion
N)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationReluAddz	fp32 op: z
 fp16 op: r   FMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node rO   T   z#node counter of Add operator: fp32=z fp16=znode_block_list: )keep_io_typesrH   node_block_listforce_fp16_initializersz!auto_mixed_precision parameters: rJ   )convert_float_to_float16disable_shape_infer )nodesop_typeset
differenceloggerinfographoutputnameoutput_name_to_nodeinputget_initializerr   debugwarninginput_name_to_nodesget_nodes_by_op_typeget_parentsget_childrenlenappendrU   float16r:   )rG   rH   rI   rJ   nodeop_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionra   last_matmul_nodeinitializerrb   max_diffrR   rS   rf   fp32_addchanged	add_nodesparentschildrenchildblocked_childrenparentblocked_parentsfp16_add
parametersrU   s                                 r!   auto_mixed_precisionT5Helper.auto_mixed_precision   s}   "  M 1;0@0@0BC0B||0BC-(!,,[9i}J{mDE (--/66q9>> $) (<<>!888"6<<8##KK=dii[IJK(88?* $ 1=HLLNtyykY[\d[efg'/$$NNJ4<<.Xabfbkbkalmn(/?/KUf/0M/445O%","@"@"BHG"77>I%Dyy7","8"8"S#-#:#:4AT#U/7,/7e5==M;Y]bfu]uEx ) , 29+18vFNNm<[_eix_xF ( +   01C4HHAM+22499=$MH&*G & '" 9~0HKK=hZvhZXY''89: +*.'?	

 	7
|DE#//\\Q[\  9$  $(  m Db,+s   M 
M(M4M
M
optimized_model_path
is_float16num_attention_headshidden_sizer   use_gpuforce_fp16_ioc	           
          SSK Jn	  Sn
U(       a  U	" S5      n
U(       + U
l        [        U SUUSU
US9nU(       a*  U(       a  [        R                  XS9  OUR                  US9  UR                  XSS	9  g)
zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr+   )r'   	num_headsr   	opt_leveloptimization_optionsr   )rI   )cast_input_outputT)all_tensors_to_one_file)fusion_optionsr   enable_skip_layer_normr   r   r    convert_model_float32_to_float16save_model_to_file)r>   r   r   r   r   r@   r   r   r   r   r   ms               r!   optimize_onnxT5Helper.optimize_onnx   s     	1##0#6 >R:R 7)#!5
 #--a-Q22]2S	1eijr$   ort_sessionc                     [        U [        5      (       a  [        R                  " XX#5      $ [        R                  " XX#5      $ )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)rD   r	   r
   verify_onnxr   )r:   r   r&   rB   s       r!   r   T5Helper.verify_onnx#  s8     e122-99%fgg**5vXXr$   rW   ) F)r+   r   F)TFTF)NFT)FTFF)__name__
__module____qualname____firstlineno__staticmethodstrboolr"   r4   r&   dictr	   r   r<   rE   r   listr   intr   r   r   __static_attributes__rW   r$   r!   r   r       sM     	=== = 	=
 
= =8 
 !%*+;+;+; +; 	+;
 +; #+; 
c')33	4+; +;Z 
 ).&*!&//  	
 #'  $  :  +/"')-	nnCy4'n  n #'	n n`  */%)#%k%k!%k %k !	%k
 %k #'%k #%k %k %k %kN 
Y//
Y%
Y 
Y 	
Y 
Yr$   r   )loggingr   pathlibr   r4   rl   r   rG   r   	optimizerr   
t5_decoderr   r   t5_encoder_decoder_initr	   r
   transformersr   r   onnxruntimer   	getLoggerr   r\   PRETRAINED_T5_MODELSPRETRAINED_MT5_MODELSr   rW   r$   r!   <module>r      sT     	   -   $ 1 T P (			8	$M  NY NYr$   