U
    hV                     @   sp  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6m7Z7m8Z8m9Z9 d dl:m;Z; ee<Z=G dd de;Z>dS )    )	getLogger)ListOptional)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
GraphProto
ModelProtoTensorProtoValueInfoProtohelper)	OnnxModelc                       sF  e Zd ZdFeeed fddZdd Zdd Zd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zeee ed#d$d%Zed&d'd(Zejfeeed)d*d+Zd,d- ZdGd0d1Z d2d3 Z!d4d5 Z"d6d7 Z#d8d9 Z$dHe%e& ed<d=d>Z'd?d@ Z(dIdAdBZ)dJedCdDdEZ*  Z+S )KBertOnnxModelr   )model	num_headshidden_sizec                    s   |dkr|dks(|dkr$|| dks(t t | || _|| _t| | _t| | j| j| j| _t	| | j| j| j| _
t| | _dS )aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)AssertionErrorsuper__init__r'   r(   r   attention_maskr   attention_fusionr   qordered_attention_fusionr   utils)selfr&   r'   r(   	__class__ L/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/onnx_model_bert.pyr+   %   s    (
   zBertOnnxModel.__init__c                 C   s   | j   | j  d S N)r-   applyr.   r0   r3   r3   r4   fuse_attention:   s    
zBertOnnxModel.fuse_attentionc                 C   s4   t | }|  t| }|  t| }|  d S r5   )r   r6   r   r   r0   fusionr3   r3   r4   	fuse_gelu?   s    zBertOnnxModel.fuse_geluc                 C   s   t | |}|  d S r5   )r	   r6   )r0   is_fastgelur:   r3   r3   r4   fuse_bias_geluH   s    
zBertOnnxModel.fuse_bias_geluc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   gelu_approximationL   s    z BertOnnxModel.gelu_approximationc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_gemm_fast_geluP   s    z!BertOnnxModel.fuse_gemm_fast_geluc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_add_bias_skip_layer_normT   s    z+BertOnnxModel.fuse_add_bias_skip_layer_normc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_reshapeX   s    zBertOnnxModel.fuse_reshapec                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   
fuse_shape\   s    zBertOnnxModel.fuse_shapec                 C   s   t | |}|  d S r5   )r
   r6   )r0   use_mask_indexr:   r3   r3   r4   fuse_embed_layer`   s    
zBertOnnxModel.fuse_embed_layerc                 C   s4   t | }|  t| }|  t| }|  d S r5   )r   r6   r   r   r9   r3   r3   r4   fuse_layer_normd   s    zBertOnnxModel.fuse_layer_normc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_simplified_layer_normo   s    z(BertOnnxModel.fuse_simplified_layer_normc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_skip_layer_norms   s    z"BertOnnxModel.fuse_skip_layer_normc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_skip_simplified_layer_normw   s    z-BertOnnxModel.fuse_skip_simplified_layer_normc                 C   s   t | }|  ttdd | jjj}ttdd |}d}|t	| jj
k r| jj
| }d|jkr~|j|kr~| jj
| q>|d7 }q>d S )Nc                 S   s   | j dko| jdkS )NRotaryEmbeddingcom.microsoft)op_typedomainnoder3   r3   r4   <lambda>       z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>c                 S   s   | j S r5   )rL   rM   r3   r3   r4   rO      rP   r   rI      )r   r6   listfilterr&   graphrN   setmaplenZ	functionsnamerL   remove)r0   r:   Zrot_emb_nodesZnon_ms_domains_to_keepifnr3   r3   r4   fuse_rotary_embeddings{   s     z$BertOnnxModel.fuse_rotary_embeddingsc                 C   s   t | }|  d S r5   )r   r6   r9   r3   r3   r4   fuse_qordered_mamtul   s    z"BertOnnxModel.fuse_qordered_mamtul)rK   input_indicescastedc           
         s   g }|   }| |}|D ]|  fdd|D }|D ]`}| |rR|s|| q4||kr4|| }	|	jdkr4| |	jd dk	r4|r4||	jd  q4q|S )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        c                    s$   g | ]}|t  jk r j| qS r3   )rW   input).0rZ   rM   r3   r4   
<listcomp>   s      zABertOnnxModel.get_graph_inputs_from_node_type.<locals>.<listcomp>Castr   N)output_name_to_nodeget_nodes_by_op_typefind_graph_inputappendrK   r`   )
r0   rK   r^   r_   Zgraph_inputsrd   nodesZbert_inputsZ
bert_inputparentr3   rM   r4   get_graph_inputs_from_node_type   s    

z-BertOnnxModel.get_graph_inputs_from_node_typer_   c                 C   s,   |  ddddg|}||  ddg|7 }|S )NEmbedLayerNormalizationr   rQ      	Attention   )rj   )r0   r_   inputsr3   r3   r4   !get_graph_inputs_from_fused_nodes   s    z/BertOnnxModel.get_graph_inputs_from_fused_nodes)rT   graph_inputnew_typec                 C   s  t |tstt |tst| |js,t|jjjt	|krFdg fS d}g }| 
 }|j|krl||j }dd |D }|r| d}	|	d |j }
|j }|| |
|_tjd|jg|
gt	|jjj|	d}|j|g |D ]}t||j|
 qdd |D }|D ]L}t|dt	|kr<| |jd	 |j | |jd	 s|| q|rl| | t	||jj_||fS )
a  Change graph input type, and add Cast node if needed.

        Args:
            graph (GraphProto): graph
            graph_input (TensorProto): input of the graph
            new_type (int, optional): new data type. Defaults to TensorProto.INT32.

        Returns:
            NodeProto: a new Cast node that added. None if Cast node is not added.
            List[NodeProto]: Cast nodes that have been removed.
        Nc                 S   s   g | ]}|j d kr|qS rc   rK   ra   rN   r3   r3   r4   rb      s     
 z9BertOnnxModel.change_graph_input_type.<locals>.<listcomp>rc   _)torX   c                 S   s   g | ]}|j d kr|qS rt   ru   rv   r3   r3   r4   rb      s     
 rx   r   )
isinstancer   r)   r"   rf   rX   typetensor_typeZ	elem_typeintinput_name_to_nodesZcreate_node_nameZ
value_infoaddZCopyFromr#   	make_noderN   extendr$   Zreplace_node_inputZget_node_attributereplace_input_of_all_nodesoutputZfind_graph_outputrg   remove_nodes)r0   rT   rr   rs   Znew_cast_nodenodes_to_remover}   rh   Znodes_not_castZ	node_nameZoutput_nameZnew_value_inforN   Z
nodes_castr3   r3   r4   change_graph_input_type   sJ    




z%BertOnnxModel.change_graph_input_typec                 C   sd   |   }d}d}|jD ]0}| ||tj\}}|r:|d7 }|t|7 }qtd| d| d dS )zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   rQ   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rT   r`   r   r!   INT32rW   loggerinfo)r0   rT   Zadd_cast_countZremove_cast_countrr   Znew_nodeZremoved_nodesr3   r3   r4   change_graph_inputs_to_int32   s    
z*BertOnnxModel.change_graph_inputs_to_int32
batch_sizemax_seq_lenc                 C   s   | j dd| j dd }| jjjD ]B}|j|kr"|jjjjd }||_	|dk	r"|jjjjd }||_	q"| jjj
D ]}|jjjjd }||_	qpdS )zD
        Update input and output shape to use dynamic axes.
        Trk   Fr   NrQ   )rq   r&   rT   r`   rX   rz   r{   shapeZdimZ	dim_paramr   )r0   Zdynamic_batch_dimZdynamic_seq_lenZbert_graph_inputsr`   Z	dim_protor   r3   r3   r4   use_dynamic_axes   s    

zBertOnnxModel.use_dynamic_axesc                 C   s   |    d S r5   )adjust_reshape_and_expandr7   r3   r3   r4   
preprocess  s    zBertOnnxModel.preprocessc           
   	   C   s:  g }|   D ]}|jdkr| |jd }|d k	rd|jdkrd||g | |jd |jd  q| |ddddgddddg| 	 }|d k	r|d }| |jd }|d }| |jd }|d }	|d k	r|d k	rt
|d	krt
|dkr|d |d kr|	jd |jd< q|r6| | td
t
|  d S )NZReshaperQ   r   ZExpandZSlice   z"Removed Reshape and Expand count: )rh   rK   Zget_constant_valuer`   sizer   r   r   match_parent_pathrd   rW   r   r   r   )
r0   r   rN   Zreshape_shapeZreshape_pathZexpand_nodeZexpand_shape_valueZreshape_before_expandZshape_valueZ
slice_noder3   r3   r4   r     sD    





z'BertOnnxModel.adjust_reshape_and_expandc                 C   sv  |   }g }|  D ]P}dddd}|j|kr||j }| |ddddd	d
g|dddddg|}|d k	r|\}}}	}
}}|jd |  jd jkr|jd |jd< |   }|jdkr| |dddd
gddddg|}|d k	r|d jd |  jd jkrtj	d|jdt
|jd  |j|jd d}d|_|jtd| jg | || |j || q| | d S )NrQ   r   ro   )rl   	ReduceSumrn   rc   ZConstantOfShapeZConcatZ	UnsqueezeZGatherZShapern   r   r   Z_remove_mask)rp   outputsrX   rJ   r'   )rd   rh   rK   r   r`   rT   rX   r   r#   r   rW   rL   	attributer   Zmake_attributer'   add_nodeZget_graph_by_noderg   r   )r0   rd   r   rN   Zop_input_idrZ   Zparent_nodescastZconstantOfShapeconcatZ	unsqueezeZgatherr   Zattention_noder3   r3   r4   clean_graph=  sd    	




zBertOnnxModel.clean_graphc                 C   s   |    |   d S r5   )r   Zprune_graphr7   r3   r3   r4   postprocess  s    zBertOnnxModel.postprocessNF)optionsadd_dynamic_axesc                 C   s  |d k	r|j s|   | j  | j  |d ks8|jrH|   |   |d ksV|jr^| 	  | 
  |   |d ks||jr|   |   |d ks|jr|   |d k	r| j|j |jrt| jtst| | j| j| j|j| _|d ks|jr|   |d ks|jr|   |   |d ks.|j rD|jt!j"k}| #| | j$  | %  |d ksh|j&r| j'dd | j'dd |d ks|j(r| )  |d k	r|j*r| +  |d k	r|j,r| -  | .  |r| /  t01d| 2   d S )NT)r<   Fzopset version: )3Zenable_shape_inferenceZdisable_shape_inferencer/   Zremove_identity_nodesZremove_useless_cast_nodesZenable_layer_normrE   rF   Zenable_gelur;   r   rA   Zenable_skip_layer_normrG   rH   Zenable_rotary_embeddingsr\   r,   Zset_mask_formatZattention_mask_formatZuse_multi_head_attentionry   r-   r   r   r(   r'   Zenable_attentionr8   Zenable_qordered_matmulr]   rB   Zenable_embed_layer_normr   ZMaskIndexEndrD   Zremove_useless_reshape_nodesr   Zenable_bias_gelur=   Zenable_bias_skip_layer_normr@   Zenable_gelu_approximationr>   Zenable_gemm_fast_gelur?   Zremove_unused_constantr   r   r   Zget_opset_version)r0   r   r   rC   r3   r3   r4   optimize  sb    

    

zBertOnnxModel.optimizec                 C   sd   i }ddddddddd	d
ddg}ddddg}|| D ]}|  |}t|||< q4td|  |S )z8
        Returns node count of fused operators.
        rl   rn   MultiHeadAttentionGeluFastGeluBiasGeluZGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationrI   QOrderedAttentionZQOrderedGeluZQOrderedLayerNormalizationZQOrderedMatMulzOptimized operators: )re   rW   r   r   )r0   op_countopsZq_opsoprh   r3   r3   r4   get_fused_operator_statistics  s(    
z+BertOnnxModel.get_fused_operator_statisticsc           	         s
   dkr|    td fdd}|d}|d|d |d }|d	|d
 |d }|d|d }|d|d }|dko|dko||ko|d| kp|d| k}|dkrtd |dkrtd |dkrtd |dkrtd |dkrtd |S )zA
        Returns True when the model is fully optimized.
        NZop_namec                    s     | pdS )Nr   )getr   fused_op_countr3   r4   r     s    z2BertOnnxModel.is_fully_optimized.<locals>.op_countrl   rn   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r   strr   debugwarning)	r0   r   r   embedZ	attentionZgeluZ
layer_normZsimple_layer_normZ
is_perfectr3   r   r4   is_fully_optimized  s4    





z BertOnnxModel.is_fully_optimized)use_symbolic_shape_inferc                 C   s   t | }|| d S r5   )r   convert)r0   r   Zpacking_moder3   r3   r4   convert_to_packing_mode  s    z%BertOnnxModel.convert_to_packing_mode)r   r   )r   r   )NF)N)F),__name__
__module____qualname__r    r|   r+   r8   r;   r=   r>   r?   r@   rA   rB   rD   rE   rF   rG   rH   r\   r]   r   r   boolrj   rq   r!   r   r   r"   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r3   r3   r1   r4   r%   $   sD   		A
)BM
(r%   N)?loggingr   typingr   r   r   r   Zfusion_attentionr   r   Zfusion_bart_attentionr   Zfusion_biasgelur	   Zfusion_embedlayerr
   Zfusion_fastgelur   Zfusion_gelur   Zfusion_gelu_approximationr   Zfusion_gemmfastgelur   Zfusion_layernormr   r   Zfusion_optionsr   r   Zfusion_qordered_attentionr   Zfusion_qordered_gelur   Zfusion_qordered_layernormr   Zfusion_qordered_matmulr   Zfusion_reshaper   Zfusion_rotary_attentionr   Zfusion_shaper   Zfusion_simplified_layernormr   r   Zfusion_skiplayernormr   r   Zfusion_utilsr   Zonnxr   r    r!   r"   r#   Z
onnx_modelr$   r   r   r%   r3   r3   r3   r4   <module>   s4   