U
    hI                     @   sZ   d dl Z d dlZd dlmZmZ d dlmZmZ d dl	m
Z
 e eZG dd deZdS )    N)AttentionMaskFusionAttention)TensorProtohelper)	OnnxModelc                       s<   e Zd ZdZeeeed fddZdd Zdd Z	  Z
S )	FusionBartAttentionz?
    Fuse Bart Attention subgraph into one Attention node.
    )modelhidden_size	num_headsattention_maskc                    s   t  |||| d S )N)super__init__)selfr   r	   r
   r   	__class__ R/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/fusion_bart_attention.pyr      s    zFusionBartAttention.__init__c                 C   s  | j |dgdg}|d kr dS |d }| j |dddgdddg}	| j |dddgdddg}
|	d ksp|
d krtdS |	\}}}|
\}}}|jd |ks|jd |krdS | j |dddgdddg}| j |dddgdddg}|d ks|d krdS |d	 j|jks|d	 j|jkrdS | j |ddd
gdddg}| j |ddd
gdddg}| j |ddd
gdddg}|d ks|d ks|d krdS |d	 }|d	 }|d	 }|jd }|jd |ks|jd |ks|jd |krdS dS )NConcat   Fr   	UnsqueezeZGatherZShape   MulT)r   match_parent_pathinputnameoutput)r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2
root_inputZconcat_qkv_2_pathZconcat_qkv_2Zreshape_qkv_2_path_1Zreshape_qkv_2_path_2_Zgather_1Zshape_1Zgather_2Zshape_2Zreshape_qkv_1_path_1Zreshape_qkv_1_path_2Zreshape_q_2_pathZreshape_k_2_pathZreshape_v_2_pathmul_qZmul_kZmul_vZgather_1_outr   r   r   check_runtime_shape_path   s<    	

$
0z,FusionBartAttention.check_runtime_shape_pathc           O      C   s	  | j |ddddddgddddddg}|d k	rB|\}}}}}	}
nd S g }|jD ],}||kr^qP||d jd krrqP|| qPt|dkrd S |d }|| }|jdkr| j |d }|jD ]6}|sq|| }dd |D }|ddkr|} qqt	d	d | j 
 jD }t	d
d | j 
 jD }| j |
dddddgddddd g}| j |
ddddddgdddddd g}| j |
dgdg}d\}}d\}}|d k	r|\}}}}}|jd }n|d k	r|\}}}}}}|}|jd }|jd }n|d k	rx|d jd |krx|}|d jd }|d jd }||krttdd | j  | }t|dkrr|d jd nd}ntd d S ||kr|nd}||kr|nd}| j |
ddgddg} | j |
dddddgdddddg}!| d k	r| \}"}#| }$n"|!d k	r|!\}"}"}%}"}#|!}$nd S | j |#ddddddgddddddg}&|&d k	rd|&\}'}(})}*}+},nd S | j |#ddddddgddddddg}-| j |#dddddgdddddg}.| j |#ddddddgddddddg}/| j |#ddgddg}0d\}1}2d\}3}4}5|-d k	r&|-\}"}3}6}4}7}5|-}8n|.d k	rN|.\}"}3}6}4}5|.}8|6jd }2n|/d k	r|/\}"}3}9}"}4}5|/}8|9jd }1|9jd }2n|0d k	r|0d jd |kr|0}8|8d jd }1|8d jd }2|2|krttdd | j  |1 }:t|:dkr|:d jd nd}2nd S |1|kr|1nd}1|2|kr,|2nd}2|8|.|/fkr| j |jd jd };d}<| j |<}=|=d kr| j|<tj|;gtjdg|; tjdd | j d}>td|<|5jd g|4jg|>}7|1s| ||	|'|3||sd S |1o |o |5d ko dt k}?|? o6|5jd |ko6|,jd |ko6|jd |k}@|? ox|,jd |kox|5jd |jd kox|5jd |,jd k}A|@o|$| k}B|@o|$|!k}C|Bo|1o|}D|Ao|$| k}E|?o|$| k}Fd }G|Cr2| j |%dgdg}H| j |%ddddgddddg}I|Id k	r|Id jd }Gn|Hd k	r2|Hd jd }G|BsP|CsP|DsP|EsP|F	r|}J| |)\}K}L|Kdks|Ldks|L|K dkrtd d S d }M|Ds|Es|Fr6| jr0| j |,|Es|Dr|5n|1|Es|Dr|n||+|Es|Dr|7nd |Es|Dr|nd |K|L|Jjd |Dr|1nd|Dr"|nd|2||Dd nd }MnL| j}Nd!| _| j!d |,|5||+|7||K|L||Jjd |Crl|Gnd |1||2|d"}M|N| _|Md krd S | j"|M | j#| j$|Mj< | j%&|J||
g | j%&|$ |Ds|Es|F	r~|&d jdkr|&'  |8d jdk	r
|8'  |d jdk	r"|'  | j(	r~|E	s6|F	r~|&d jdk	rN|&'  |8d jdk	rf|8'  |d jdk	r~|'  | j%&|& | j%&|8 | j%&| d#| _)d S )$NZAddZMatMulZReshapeZ	Transposer   r   c                 S   s   g | ]
}|j qS r   op_type).0childr   r   r   
<listcomp>   s     z,FusionBartAttention.fuse.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r   r(   noder   r   r   r*      s     c                 S   s   g | ]
}|j qS r   r+   r,   r   r   r   r*      s     r   ) r.   )NNr   c                 S   s
   | j dkS NZIdentityr&   r-   r   r   r   <lambda>       z*FusionBartAttention.fuse.<locals>.<lambda>r.   z&fuse_attention: failed to match v pathZSoftmaxr   )NNNc                 S   s
   | j dkS r/   r&   r0   r   r   r   r1      r2   Z
empty_biasg        )Zdtype)dimsvalsmatmul_vZWhereZExpandr   z9fuse_attention: failed to detect num_heads or hidden_size)past_kpast_v	present_k	present_vZ
packed_qkvF)Z
add_qk_strr6   r7   r8   r9   T)*r   r   r   r   appendlenr'   Zget_childrencountsetgraphlistfilterinput_name_to_nodesloggerdebugZget_initializerr3   Zadd_initializerr   FLOATnparrayZfloat32Zcreate_node_namer   Z	make_noder   r%   localsZget_num_heads_and_hidden_sizeZuse_multi_head_attentionZcreate_multihead_attention_nodeZcreate_attention_nodeZnodes_to_addZthis_graph_nameZnode_name_to_graph_nameZnodes_to_removeextendpopZ!disable_multi_head_attention_biasZprune_graph)Or   Znormalize_noderA   Zoutput_name_to_nodeZ	qkv_nodesZadd_outZ
matmul_outr   Ztranspose_qkvr   Z
matmul_qkvZother_inputsr   r"   Zskip_layernormr   childrenZchildren_typesZgraph_input_namesZgraph_output_namesZv_nodesZv_nodes_with_past_self_attnZv_nodes_with_past_cross_attnr7   r9   r!   Zadd_vZtranspose_vZreshape_v_1r5   Zconcat_vZidentity_node_vZ
qk_nodes_1Z
qk_nodes_2r#   Z	matmul_qkZqk_nodesZadd_qkZq_nodesr   Ztranspose_qZreshape_q_1r$   Zadd_qZmatmul_qZk_nodes_with_biasZk_nodes_no_biasZ#k_nodes_no_bias_with_past_self_attnZ$k_nodes_no_bias_with_past_cross_attnr6   r8   r    Zreshape_k_1Zmatmul_kZtranspose_k_1Zadd_kZk_nodesZconcat_kZidentity_node_kZbias_dimZempty_bias_nameZempty_tensorZadd_nameZthree_root_inputsZone_root_inputZtwo_root_inputsZencoder_attentionZdecoder_attentionZdecoder_attention_with_pastZdecoder_cross_attentionZ!decoder_cross_attention_with_pastZ
mask_indexZmask_nodes_bartZmask_nodes_whisperZattention_last_noder
   r	   Znew_nodeZ%use_multi_head_attention_ground_truthr   r   r   fuseM   sJ   	






"
  








"

 



"

zFusionBartAttention.fuse)__name__
__module____qualname____doc__r   intr   r   r%   rK   __classcell__r   r   r   r   r      s   	0r   )loggingZnumpyrE   Zfusion_attentionr   r   Zonnxr   r   Z
onnx_modelr   	getLoggerrL   rB   r   r   r   r   r   <module>   s   
