U
    h-                     @   s,   d dl mZ G dd dZG dd dZdS )    )ArgumentParserc                   @   s   e Zd ZdZdZdZdZdS )AttentionMaskFormatr            N)__name__
__module____qualname__MaskIndexEndZMaskIndexEndAndStartAttentionMaskNoMask r   r   K/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/fusion_options.pyr      s   r   c                   @   sH   e Zd ZdZdd ZdddZdd Zed	d
 Zee	dddZ
dS )FusionOptionsz'Options of fusion in graph optimizationc                 C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _|dkrhd| _tj| _|dkrtj| _n|dkrtj| _|dkrd| _d| _d| _d| _d| _d| _d| _d S )NTFclipZbertZvitZunetZvaer   )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attentionZ!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationZenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr
   r   enable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)self
model_typer   r   r   __init__   s<    
zFusionOptions.__init__Tc                 C   s   |rt j| _nt j| _d S N)r   r   r   r
   )r'   Zuse_raw_maskr   r   r   use_raw_attention_maskF   s    
z$FusionOptions.use_raw_attention_maskc                 C   s   t j| _d S r*   )r   r   r   )r'   r   r   r   disable_attention_maskL   s    z$FusionOptions.disable_attention_maskc                 C   sB  t | j}| jrd|_| jr"d|_| jr.d|_| jr:d|_	| j
rFd|_
| jrRd|_| jr^d|_| jrjd|_| jrvd|_| jrd|_| jrd|_| jrd|_| jr|d | jr|d | jr|  | jdkr>| jrd|_| jrd|_| jrd|_ | j!rd|_"| j#rd|_$| j%r"d|_&| j'r0d|_(| j)r>d|_*|S )NFTr   )+r   r(   disable_gelur   disable_layer_normr   Zdisable_rotary_embeddingsr   disable_attentionr   r   disable_skip_layer_normr   disable_embed_layer_normr   disable_bias_skip_layer_normr   disable_bias_gelur   r   disable_shape_inferencer   r   use_mask_indexr+   no_attention_maskr,   use_group_norm_channels_firstr   disable_nhwc_convr    disable_group_normr!   disable_skip_group_normr"   disable_bias_splitgelur#   disable_packed_qkvr$   disable_packed_kvr%   disable_bias_addr&   )argsoptionsr   r   r   parseO   sb    


zFusionOptions.parseparserc                 C   s  | j ddddd | jdd | j ddddd | jdd	 | j d
dddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j dddd d | jdd! | j d"ddd#d | jdd$ | j d%ddd&d | jdd' | j d(ddd)d | jdd* | j d+ddd,d | jdd- | j d.ddd/d | jdd0 | j d1ddd2d | jdd3 | j d4ddd5d | jdd6 | j d7ddd8d | jdd9 | j d:ddd;d | jdd< | j d=ddd>d | jdd? | j d@dddAd | jddB | j dCdddDd | jddE | j dFdddGd d S )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)r/   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)r0   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)r1   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)r2   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)r3   z--disable_layer_normz!disable LayerNormalization fusion)r.   z--disable_geluzdisable Gelu fusion)r-   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)r   z--disable_shape_inferencez disable symbolic shape inference)r4   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)r   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)r5   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)r+   z--no_attention_maskz1no attention mask. Only works for model_type=bert)r6   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)r9   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)r:   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)r=   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)r<   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)r>   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)r;   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)r8   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)r7   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaultsrB   r   r   r   add_arguments   s@   zFusionOptions.add_argumentsN)T)r   r   r	   __doc__r)   r+   r,   staticmethodrA   r   rJ   r   r   r   r   r      s   -

5r   N)argparser   r   r   r   r   r   r   <module>   s   