
    h                         S SK r S SKJrJr  S SKJr  S SKJrJr  S SK	J
r
  S SKJr  \ R                  " \5      r " S S\5      r " S	 S
\5      rg)    N)AttentionMaskFusionAttention)NumpyHelper)	NodeProtohelper)	OnnxModel)BertOnnxModelc                   z   ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\	S	\
S
\
S\S\S\	S\	S\	S\
S-  4S jrS rSrU =r$ )FusionTnlrAttention   z
Fuse TNLR Attention subgraph into one Attention node.
TNLR Attention has extra addition after qk nodes and adopts [S, B, NH] as I/O shape.
modelhidden_size	num_headsattention_maskc                 &   > [         TU ]  XX45        g N)super__init__)selfr   r   r   r   	__class__s        b/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/onnx_model_tnlr.pyr   FusionTnlrAttention.__init__   s     	YG    
mask_indexmatmuladdinputoutput
add_qk_strreturnNc	                    US:  d   eUS:  a$  XT-  S:w  a  [         R                  SU SU 35        g U R                  R                  UR                  S   5      n	U R                  R                  UR                  S   5      =(       d(    U R                  R                  UR                  S   5      n
U	b  U
c  g [
        R                  " U	5      n[
        R                  " U
5      nU R                  R                  S5      nU	R                  n[        R                  " U5      n[        R                  " US-   UUSU-  /UR                  U5      R                  5       SS	9n	U R                  R                  XR                  5        [        R                  " US
-   USU-  /UR                  U5      R                  5       SS	9n
U R                  R                  XR                  5        UUS-   US
-   /nUb  UR!                  U5        OUR!                  S5        Ub"  UR!                  S5        UR!                  U5        [        R"                  " SUU/US9nSUl        UR&                  R)                  [        R*                  " SU5      /5        U$ )Nr   zinput hidden size z# is not a multiple of num of heads    	Attention_qkv_weight   T)name	data_typedimsvalsraw	_qkv_bias )inputsoutputsr&   zcom.microsoftr   )loggerdebugr   get_initializerr   r   to_arraycreate_node_namer'   r   tensor_dtype_to_np_dtypemake_tensorastypetobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attribute)r   r   r   r   r   r   r   r   r   weightbias
qkv_weightqkv_biasattention_node_nametensor_dtypenp_typeattention_inputsattention_nodes                     r   create_attention_node)FusionTnlrAttention.create_attention_node   s]    1}}? 7A=LL-k]:]^g]hij++FLLO<zz))#))A,7c4::;U;UVYV_V_`aVb;c>T\ ))&1
''-"jj99+F''11,?##$}4"q;/""7+335
 	

""6+?+?@!!${2"k/")113
 	

""4)=)=> -/+-

 !##J/##B'!##B'##J/))#H$	
 !0  '')>)>{I)V(WXr   c                    UnUR                   S:w  a  g U R                  R                  U/ SQ/ SQ5      nUb  Uu    pgpn
Og / n[        UR                  5       H4  u  pX;  a  M  XS   R
                  S   :X  a  M#  UR                  U5        M6     [        U5      S:w  a  g US   nU R                  R                  U
/ SQ/ SQ5      nUc  g Uu      nnnU R                  R                  US/S/5      nUS   nU R                  R                  U
/ S	Q/ S
Q5      nUc  g Uu  nnnU R                  R                  U/ SQ/ SQ5      nUc  g US   nUS   nU R                  R                  U/ SQ/ SQ5      nUc  g US   nUS   nU R                  R                  USS/SS/5      nUc  g UR                  S   U:X  Ga  S nUnU R                  UUUU R                  U R                  UUR
                  S   US   R                  S   5      nUc  g U R                  R                  U5        U R                  U R                  UR                  '   [        R                   " SSUR                  -   /UR
                  S   /SUR                  -   / SQS9nU R                  R#                  UU R                  5        UR                  S   UR                  S'   SUR                  -   UR
                  S'   U R$                  R'                  UX/5        U R$                  R'                  U5        U R$                  R'                  U5        U R$                  R'                  U5        U R$                  R'                  U5        SU l        g g )NSkipLayerNormalization)WhereAddMatMulReshape	TransposerO   )r"   r"   r"   r   r   r   r   r"   )rQ   rP   SlicerN   rO   )r"   r   r   r   r"   rQ   )SoftmaxrN   rO   )r   r   r   )MulrQ   rP   rR   rN   rO   )r   r   r   r   r   r"   rP   rM   back_transpose_in_back_transpose_)r"   r      )permT)op_typer   match_parent_path	enumerater   r   r:   lenrI   r   r   nodes_to_addr9   node_name_to_graph_namer&   r   r;   add_nodenodes_to_remover>   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node
start_node	qkv_nodes_matmul_belowreshape_qkvtranspose_qkv
matmul_qkvother_inputs_ir   
root_inputv_nodesr   r   upper_nodes	transposeqk_nodesadd_qk	matmul_qkq_nodesk_nodesrelative_position_bias_nodesr   attention_last_nodenew_nodeback_transposes                                 r   fuseFusionTnlrAttention.fuseg   s    $
!!%== JJ00H
	
  KTHQZ":#3#34IB/!++A..& 5 |!!!_
**..>

 ?!(Aq#vjj226K=1#NN	:://
<XZcd!)FI**..E

 ?bk**..>

 ?bk'+zz'C'CFYX_L`cdfgbh'i$'/<<?j(J"- 11  #**1-,Q/55a8	H $$X.:>:N:ND((7 $--%56#$!HMM1N JJ0D0DE ) 2HNN1!5!EHOOA  '')<m(XY  ''1  ''0  ''0  ''0  $DS )r   )rc   )__name__
__module____qualname____firstlineno____doc__r   intr   r   strr   rI   r}   __static_attributes____classcell__r   s   @r   r   r      s    
HH H 	H
 &HFF F 	F
 F F F F F 
T	FPq$ q$r   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )TnlrOnnxModel   c                    > [         TU ]  XU5        [        U 5      U l        [	        X R
                  U R                  U R                  5      U l        g r   )r   r   r   r   r   r   r   attention_fusion)r   r   r   r   r   s       r   r   TnlrOnnxModel.__init__   sA    ;7+D1 3D:J:JDNN\`\o\o pr   c                 8    U R                   R                  5         g r   )r   apply)r   s    r   fuse_attentionTnlrOnnxModel.fuse_attention   s    ##%r   )r   r   )r   r   r   r   r   r   r   r   r   s   @r   r   r      s    q
& &r   r   )loggingfusion_attentionr   r   fusion_utilsr   onnxr   r   
onnx_modelr   onnx_model_bertr	   	getLoggerr   r/   r   r    r   r   <module>r      sD   
  ; $ "   )			8	$H$/ H$V&M &r   