U
    “±Ëh…  ã                   @   s¼   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZ eeƒZG dd„ deƒZdS )é    )Ú	getLogger)ÚOptional)ÚFusionAttentionUnet)ÚFusionBiasAdd)ÚFusionBiasSplitGelu)ÚFusionGroupNorm)ÚFusionNhwcConv)ÚFusionOptions)ÚFusionSkipGroupNorm)ÚFusionInsertTransposeÚFusionTranspose)Ú
ModelProto)Ú	OnnxModel)ÚBertOnnxModelc                       sˆ   e Zd Zdeeedœ‡ fdd„Zdd„ Zdd„ Zd	d
„ Zdd„ Z	dd„ Z
dee dœdd„Zdd„ Zdee dœdd„Zdd„ Z‡  ZS )ÚUnetOnnxModelr   )ÚmodelÚ	num_headsÚhidden_sizec                    s>   |dkr|dks(|dkr$|| dks(t ‚tƒ j|||d dS )aG  Initialize UNet ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   )r   r   N)ÚAssertionErrorÚsuperÚ__init__)Úselfr   r   r   ©Ú	__class__© úL/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/onnx_model_unet.pyr      s    (zUnetOnnxModel.__init__c                 C   s   |   ¡  d S ©N)Úremove_useless_div©r   r   r   r   Ú
preprocess%   s    zUnetOnnxModel.preprocessc                 C   s   |   ¡  |  ¡  d S r   )Zprune_graphZremove_unused_constantr   r   r   r   Úpostprocess(   s    zUnetOnnxModel.postprocessc                 C   s~   dd„ |   ¡ D ƒ}g }|D ]}|  |d¡dkr| |¡ q|D ]}|  |jd |jd ¡ q>|rz|  |¡ t dt	|ƒ¡ dS )zRemove Div by 1c                 S   s   g | ]}|j d kr|‘qS )ZDiv)Zop_type)Ú.0Únoder   r   r   Ú
<listcomp>.   s     
 z4UnetOnnxModel.remove_useless_div.<locals>.<listcomp>g      ð?é   r   zRemoved %d Div nodesN)
ÚnodesZfind_constant_inputÚappendÚreplace_input_of_all_nodesÚoutputÚinputZremove_nodesÚloggerÚinfoÚlen)r   Z	div_nodesÚnodes_to_removeÚdivr"   r   r   r   r   ,   s    
z UnetOnnxModel.remove_useless_divc                 C   s   t | dd}| ¡  d S )NT)Zupdate_weight)r   Úapply)r   Zconv_to_nhwc_convr   r   r   Úconvert_conv_to_nhwc<   s    z"UnetOnnxModel.convert_conv_to_nhwcc                 C   sØ   t | ƒ}| ¡  d}|  d¡}|D ]’}t |d¡}t|tƒs@t‚|ttt	|ƒƒƒkrVq"|  
|jd ¡s†|  |jd ¡s†|  
|jd ¡rŠt‚|  |jd |jd ¡ |  |¡ |d7 }q"t	|jƒ| }|rÔt d|¡ d S )Nr   Z	TransposeÚpermr$   zRemoved %d Transpose nodes)r   r/   Úget_nodes_by_op_typer   Zget_node_attributeÚ
isinstanceÚlistr   Úranger,   Zfind_graph_outputr(   Zfind_graph_inputr)   r'   Úremove_noder-   r*   r+   )r   Úfusion_transposeZremove_countr%   r"   ZpermutationÚtotalr   r   r   Úmerge_adjacent_transposeA   s*    
ÿþý

z&UnetOnnxModel.merge_adjacent_transposeN)Úoptionsc                 C   s\   |d kp|j }t| | j| jd|dƒ}| ¡  |d kp8|j}t| | j| jdd|ƒ}| ¡  d S )NFT)Úenable_packed_qkvr   r   r   r/   Úenable_packed_kv)r   r:   r;   Zself_attention_fusionr<   Zcross_attention_fusionr   r   r   Úfuse_multi_head_attention]   s(         ÿ     ÿz'UnetOnnxModel.fuse_multi_head_attentionc                 C   s   t | ƒ}| ¡  d S r   )r   r/   )r   Zfusionr   r   r   Úfuse_bias_addl   s    zUnetOnnxModel.fuse_bias_addc                 C   s´  |d k	r|j s|  ¡  | j ¡  | j ¡  |d ks8|jr@|  ¡  |d ksN|jrV|  ¡  |  	¡  |  
¡  |d kst|jr¤|d kp€|j}t| |ƒ}| ¡  t| ƒ}| ¡  |d ks²|jrÂt| ƒ}| ¡  |d ksÐ|jrÚ|  |¡ |d ksè|jrð|  ¡  |  ¡  | j ¡  |d ks|jr$t| ƒ}| ¡  |d ks6|jr>|  ¡  |d k	rX|jrX|  ¡  |d ksj|jrz|  ¡  |   ¡  |d k	r”|j!r”|  "¡  |  #¡  t$ %d|  &¡ › ¡ d S )Nzopset version: )'Zenable_shape_inferenceZdisable_shape_inferenceÚutilsZremove_identity_nodesZremove_useless_cast_nodesZenable_layer_normZfuse_layer_normZenable_geluZ	fuse_gelur   Zfuse_reshapeZenable_group_normZgroup_norm_channels_lastr   r/   r   Zenable_bias_splitgelur   Zenable_attentionr=   Zenable_skip_layer_normZfuse_skip_layer_normZ
fuse_shapeZremove_useless_reshape_nodesZenable_skip_group_normr
   Zenable_bias_skip_layer_normZfuse_add_bias_skip_layer_normZenable_gelu_approximationZgelu_approximationZenable_nhwc_convr0   r9   Zenable_bias_addr>   r    r*   r+   Zget_opset_version)r   r:   Zchannels_lastZgroup_norm_fusionZinsert_transpose_fusionZbias_split_gelu_fusionZskip_group_norm_fusionr   r   r   Úoptimizep   sN    




zUnetOnnxModel.optimizec              	   C   sN   i }ddddddddd	g	}|D ]}|   |¡}t|ƒ||< qt d
|› ¡ |S )z8
        Returns node count of fused operators.
        Z	AttentionZMultiHeadAttentionZLayerNormalizationZSkipLayerNormalizationZBiasSplitGeluZ	GroupNormZSkipGroupNormZNhwcConvZBiasAddzOptimized operators:)r2   r,   r*   r+   )r   Zop_countÚopsÚopr%   r   r   r   Úget_fused_operator_statistics±   s     ÷
z+UnetOnnxModel.get_fused_operator_statistics)r   r   )N)N)Ú__name__Ú
__module__Ú__qualname__r   Úintr   r   r    r   r0   r9   r   r	   r=   r>   r@   rC   Ú__classcell__r   r   r   r   r      s   Ar   N)Úloggingr   Útypingr   Zfusion_attention_unetr   Zfusion_bias_addr   Zfusion_biassplitgelur   Zfusion_group_normr   Zfusion_nhwc_convr   Zfusion_optionsr	   Zfusion_skip_group_normr
   r7   r   r   Zonnxr   Z
onnx_modelr   Zonnx_model_bertr   rD   r*   r   r   r   r   r   Ú<module>   s   