
    h                         S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
  S SKJrJrJrJr  S SKJr  \" \5      r " S S	5      r " S
 S\5      rg)    )	getLoggerN)Fusion)AttentionMaskFormat)FusionUtilsNumpyHelper)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   X    \ rS rSrSrS\4S jrS\4S jrS r	S r
S	\S
\S-  4S jrSrg)AttentionMask   2
Fuse Attention subgraph into one Attention node.
modelc                     Xl         0 U l        0 U l        [        U5      U l        [
        R                  U l        UR                  5       U l	        g N)
r   mask_indicemask_castedr   utilsr   MaskIndexEndmask_formatget_opset_versionopset_version)selfr   s     c/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_attention.py__init__AttentionMask.__init__   sB    
 '
.;;"446    r   c                     Xl         g r   )r   )r   r   s     r   set_mask_formatAttentionMask.set_mask_format    s    &r   c                 f    XR                   ;   a  X R                   U   :X  d   eX R                   U'   g r   )r   )r   mask
mask_indexs      r   set_mask_indiceAttentionMask.set_mask_indice#   s3    ###!1!1$!7777!+r   c                 t    [        U R                  5      S:  d   e[        [        U R                  5      5      $ Nr   )lenr   nextiter)r   s    r   get_first_maskAttentionMask.get_first_mask(   s1    4##$q(((D))*++r   mask_2dreturnNc           
         U R                   [        R                  :X  a  g XR                  ;   a  U R                  U   $ U R                  R                  U5      (       a  U R                  R                  U5      u  p#OU R                  R                  U5      u  p4SnU(       a  X0R                  U'   U R                   [        R                  :X  a  X0R                  U'   U$ U R                  R                  S5      nU R                  S:  a|  [        R                  " SU/U/U R                  R                  SS5      S9nUR                  R!                  [        R"                  " SS/5      [        R"                  " S	S
5      /5        OSnU R                  R%                  U5      cA  U R                  R'                  [        R(                  " U[*        R,                  S/S/SS95        [        R                  " SX7/U/U R                  R                  SS5      S9nUR                  R!                  [        R"                  " S	S
5      /5        U R                  R/                  U5        XPR                  U'   U$ )NTr%      	ReduceSumMaskReduceSuminputsoutputsnameaxes   keepdimsr   ort_const_1_reduce_sum_axesFr8   	data_typedimsvalsraw)r   r   NoMaskr   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32r   r   create_node_namer   r
   	make_node	attributeextendmake_attributeget_initializeradd_initializermake_tensorr	   INT64add_node)r   r/   casted
input_name
_cast_nodeoutput_namemask_index_node	axes_names           r   process_maskAttentionMask.process_mask,   s$   2999&&&##G,, ::&&w//!%!E!Eg!NFJ%)ZZ%C%CG%L"JF(2W% 2@@@(2W% jj11,?"$.."|$ZZ00oN	O %%,,f.C.CFQC.PRXRgRghrtuRv-wx 6Izz)))4<

**&&&"-"3"3SS! %..".$ZZ00oN	O %%,,f.C.CJPQ.R-ST

O,$/!r   )r   r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   r   r   r!   r&   r-   strrV   __static_attributes__ r   r   r   r      sA    7i 7'+> ',
,8C 8C$J 8r   r   c            (       f  ^  \ rS rSrSrSSSSS/4S\S\S	\S
\S-  S\S\S\	\
   4U 4S jjjrS\S\\\4   4S jrS\S\\\4   4S jrS\4S jrS\
4S jrS\
S\
S\
4S jrS\
S\
S\
4S jrS\S\S-  S \S-  S!\
S\S-  4
S" jrS#\S$\S%\S\S\S-  S \S-  S\\\\4   4S& jr        S6S#\S$\\
-  S-  S%\\
-  S-  S\S\S-  S \S-  S	\S\S'\
S(\
S\
S)\S\
S\
S*\
S+\
S,\S\S-  4$S- jjr       S7S.\
S-  S#\S$\S%\S\S\S \S	\S\S/\
S'\
S0\
S1\S\
S\
S*\
S+\
S2\S-  S\S-  4&S3 jjrS4 rS5rU =r$ )8FusionAttentiong   r   NFSkipLayerNormalizationLayerNormalizationr   hidden_size	num_headsattention_maskuse_multi_head_attention!disable_multi_head_attention_biassearch_op_typesc                    > U(       a  SOSn[         T	U ]  XU5        X l        X0l        U(       a  UO
[	        U5      U l        XPl        X`l        S U l        SU l	        SU l
        S U l        SU l        g )NMultiHeadAttention	AttentionT)superr   re   rf   r   rg   rh   ri   mask_filter_valuenum_heads_warninghidden_size_warningshape_infershape_infer_done)
r   r   re   rf   rg   rh   ri   rj   attention_op_name	__class__s
            r   r   FusionAttention.__init__l   st     5M0R]?C&"0>nMRWDX(@%1R.!% "&#'  $r   concatr0   c                    [        UR                  5      S:X  a  U R                  R                  UR                  S   5      nU R                  R                  UR                  S   5      n[	        U[
        R                  5      (       aO  UR                  S:X  a?  [	        U[
        R                  5      (       a   UR                  S:X  a  US   US   US   -  4$ U R                  U R                  4$ )a  
Detect num_heads and hidden_size from Concat node in the following subgraph:

SkipLayerNormalization or EmbedLayerNormalization
                /        |
             MatMul    Shape
                |        |
               Add     Gather(indices=0)
                |        |
                |      Unsqueeze
                |        |
                |     Concat (*, -1, 12, 64)
                |     /
               Reshape
                  |
               Transpose
         r:   r   )
r*   inputr   get_constant_value
isinstancenpndarraysizerf   re   )r   rw   rf   	head_sizes       r   )get_num_heads_and_hidden_size_from_concat9FusionAttention.get_num_heads_and_hidden_size_from_concat   s    $ v||!

55fll1oFI

55fll1oFI9bjj11NNa'y"**55NNa' |Yq\IaL%@@@~~t////r   	reshape_qc                    U R                   R                  UR                  S   5      nUc{  U R                   R                  US5      nUb!  UR                  S:X  a  U R                  U5      $ [        R                  SUR                  S   5        U R                  U R                  4$ [        U[        R                  5      (       a!  [        U5      S:w  d  US   S::  d	  US   S::  a.  [        R                  SU5        U R                  U R                  4$ US   nUS   nXE-  nU R                  S:  aH  X@R                  :w  a9  U R                  (       a(  [        R                  S	U R                  U5        S
U l        U R                  S:  aH  X`R                  :w  a9  U R                   (       a(  [        R                  SU R                  U5        S
U l        XF4$ )zDetect num_heads and hidden_size from a reshape node.

Args:
    reshape_q (NodeProto): reshape node for Q

Returns:
    Tuple[int, int]: num_heads and hidden_size
r:   Concatz%s is not initializer.ry   rz   r   r{   zGq_shape_value=%s. Expected value are like [0, 0, num_heads, head_size].z>--num_heads is %d. Detected value is %d. Using detected value.Fz@--hidden_size is %d. Detected value is %d. Using detected value.)r   r}   r|   
get_parentop_typer   loggerdebugrf   re   r~   r   r   r*   rp   warningrq   )r   r   q_shape_valuerw   rf   r   re   s          r   get_num_heads_and_hidden_size-FusionAttention.get_num_heads_and_hidden_size   s    

55iooa6HI ZZ**9a8F!fnn&@EEfMMLL19??13EF>>4#3#333 M2::66=!Q&a A%q)9Q)>LLbdqr>>4#3#333!!$	!!$	+>>A)~~"=%%TVZVdVdfo */&aK3C3C$C''VX\XhXhju ,1(%%r   add_qkc                    U R                   (       d%  U R                  R                  SS9U l        SU l         U R                  c  g U R                  R	                  UR
                  S   5      nU R                  R	                  UR
                  S   5      nUb  Uc  [        R                  SU5        g X#:w  a  [        R                  SU5        g UR
                  S   $ )NT)updater   r:   zone of the inputs of %s is Nonez)the shape of two inputs of %s is not same)rs   r   infer_runtime_shaperr   get_edge_shaper|   r   r   )r   r   input_0_shapeinput_1_shapes       r   get_add_qk_strFusionAttention.get_add_qk_str   s    $$#zz==T=JD$(D!#((77QH((77QH M$9LL:FC)LLDfM||Ar   c                   ^ US-   m[        [        U4S jU R                  5      5      n[        U5      S:X  a  T$ [        U5      S:X  d   eU R                  R                  S5      n[        R                  " S[        U R                  5       Vs/ s H  oAPM     snT/USS9nU R                  R                  U5        U R                  U R                  U'   T$ s  snf )N_maskc                 (   > U R                   S   T:H  $ r)   )output)nodemask_output_names    r   <lambda>0FusionAttention.reshape_add_qk.<locals>.<lambda>   s    t{{1~AQ/Qr   r:   r   r   r6   r7   r8   axis)listfilternodes_to_addr*   r   rF   r
   rG   rangerf   appendthis_graph_namenode_name_to_graph_name)r   r   concat_nodeconcat_node_name_concat_add_qk_fp32r   s         @r   reshape_add_qkFusionAttention.reshape_add_qk   s     "G+ 6"QSWSdSdef{q ##;1$$$::66x@#--$)$..$9:$9qF$9:%&!
 	  !349=9M9M$$%56 ;s   C
past_kpast_vc                 8   U R                   R                  S5      nU R                   R                  S5      nUS-   R                  SS5      nUS-   R                  SS5      n[        R                  " SU/U/US/S9n[        R                  " SU/U/US/S9nU R
                  R                  U5        U R
                  R                  U5        U R                  U R                  U'   U R                  U R                  U'   U R                   R                  S5      n	UR                  SS	5      R                  SS5      R                  S
S5      n
[        R                  " SXV/U
/U	SS9nU R
                  R                  U5        U R                  U R                  U	'   U
$ )zConcatenate past_k and past_v inputs to create past_kv input.

Args:
    past_k (str): name of past K value
    past_v (str): name of past V value

Returns:
    kv_output_name (str): name of past KV value
	Unsqueeze_5d.r   r   )r6   r7   r8   r9   r   z.valuez.kv_value_kvr   )	r   rF   replacer
   rG   r   r   r   r   )r   r   r   unsqueeze_k_nameunsqueeze_v_name	k_5d_name	v_5d_namek_5dv_5dr   kv_output_name	concat_kvs               r   r   FusionAttention.concat_kv  s     ::66{C::66{Ce^,,S#6	e^,,S#6	8K!
 8K!
 	  &  &9=9M9M$$%569=9M9M$$%56  ::66x@%8@@cJRRS[]bc$$)#$!
	 	  +9=9M9M$$%56r   present_k_namepresent_v_namekv_nodec                 \   Su  pEU R                   R                  U5      nU R                   R                  U5      nUcM  [        R                  " [        R
                  " SSS9US9nU R                   R                  X`R                  5        UcM  [        R                  " [        R
                  " SSS9US9nU R                   R                  XpR                  5        U R                   R                  S5      nU R                   R                  S5      n	[        R                  " SX4/U/USS	9n
[        R                  " SX5/U/U	SS	9nU R                  R                  U
5        U R                  R                  U5        U R                  U R                  U'   U R                  U R                  U	'   g)
a  Split kv_node containing present KV values into separate present K and present V values.

Args:
    present_k_name (str): name of output to store present K value in
    present_v_name (str): name of output to store present V value in
    kv_node (str): name of present KV values
)index_0index_1Nr   int64)dtype)r8   r:   Gatherr   )r   rK   r   
from_arrayr   arrayrL   r   rF   r
   rG   r   r   r   )r   r   r   r   k_indexv_indexk_dimv_dimgather_k_namegather_v_name	present_k	present_vs               r   split_kvFusionAttention.split_kv8  st    0

**73

**73= ++BHHQg,FWUEJJ&&u.B.BC= ++BHHQg,FWUEJJ&&u.B.BC 

33H=

33H=$$%#$
	 $$%#$
	 	  +  +6:6J6J$$]36:6J6J$$]3r   q_addk_addv_addname_prefixc                    U R                   R                  UR                  S   5      =(       d(    U R                   R                  UR                  S   5      n[        R                  " U5      n[
        R                  " U5      n[
        R                  " U5      nUbm  U R                   R                  UR                  S   5      =(       d(    U R                   R                  UR                  S   5      n	[        R                  " U	5      nUbm  U R                   R                  UR                  S   5      =(       d(    U R                   R                  UR                  S   5      n
[        R                  " U
5      n[
        R                  " XgU4SS9nS[
        R                  " UR                  5      -  nUS-   nU R                  UUR                  U/US9  U$ )Nr:   r   r   r{   	_qkv_biasr8   r>   r?   r@   )r   rK   r|   r   to_arrayr   
zeros_likestackprodshaperL   r>   )r   r   r   r   r   q_biasqbkbvbk_biasv_biasqkv_biasqkv_bias_dim	bias_names                 r   create_combined_qkv_bias(FusionAttention.create_combined_qkv_biase  sw    ++EKKN;itzz?Y?YZ_ZeZefgZh?i!!&)]]2]]2ZZ//A?m4::C]C]^c^i^ijk^lCmF%%f-BZZ//A?m4::C]C]^c^i^ijk^lCmF%%f-B88RRLq1277288,,+-	&&	 	 	
 r   q_matmulk_matmulv_matmulc                 b   U R                   R                  S5      nUR                  S   UR                  S   :X  a   UR                  S   UR                  S   :X  d   eU R                   R                  UR                  S   5      nU R                   R                  UR                  S   5      n	U R                   R                  UR                  S   5      n
[        R
                  " U5      n[        R
                  " U	5      n[        R
                  " U
5      nUR                  UR                  :X  a  UR                  UR                  :X  d   eUR                  S   n[        R                  " XU4SS9R                  USU-  45      nUS-   nU R                  UUR                  UR                  S   UR                  S   /US9  US-   n[        R                  " SUR                  S   U/U/US	9nU R                  U R                  U'   U/nUS
-   nU R                  U[         R"                  S/S/SS9  US-   nU R                  U[         R"                  S/U/SS9  US-   nU R                  U[         R"                  S/SU-  /SS9  US-   nU R                  U[         R"                  S/SU-  /SS9  US-   nU R                  U[         R"                  S/S/SS9  US-   n[        R                  " SUUUU/U/U R                   R                  S5      S	9nU R                  U R                  UR$                  '   US-   n[        R                  " SUUUU/U/U R                   R                  S5      S	9nU R                  U R                  UR$                  '   US-   n[        R                  " SUUUU/U/U R                   R                  S5      S	9nU R                  U R                  UR$                  '   UnUn Un!UR'                  UUU/5        U R(                  (       Gas  Ub  U R                   R                  UR                  S   5      (       a  SOSn"[        R*                  " [        R
                  " U R                   R                  UR                  U"   5      5      5      (       aH  UUR                  SU"-
  '   UnUR-                  U5        U R                  U R                  UR$                  '   Ub  U R                   R                  UR                  S   5      (       a  SOSn"[        R*                  " [        R
                  " U R                   R                  UR                  U"   5      5      5      (       aH  UUR                  SU"-
  '   Un UR-                  U5        U R                  U R                  UR$                  '   Ub  U R                   R                  UR                  S   5      (       a  SOSn"[        R*                  " [        R
                  " U R                   R                  UR                  U"   5      5      5      (       aH  UUR                  SU"-
  '   Un!UR-                  U5        U R                  U R                  UR$                  '   U R.                  R'                  U5        UU U!4$ )a9  Create packed QKV MatMul node before MultiHeadAttention node.
   This is for the scenario where an Attention node should be created but cannot be created
   because past_key and past_value are separate inputs and not one concatenated input.

Args:
    q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
    k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size)
    v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size)
    q_add (NodeProto): name of Add from Q path
    k_add (NodeProto): name of Add from K path
    v_add (NodeProto): name of Add from V path

Returns:
     q_output (NodeProto): Slice node for Q
     k_output (NodeProto): Slice node for K
     v_output (NodeProto): Slice node for V
MatMulr   r:   r   r{   _qkv_weightr   _qkv_outr5   _q_start_indexFr=   _k_start_index_v_start_indexrz   _end_of_qkv_index_qkv_last_axis_q_outSlice_k_out_v_out)r   rF   r|   rK   r   r   r   r   r   reshaperL   r>   r
   rG   r   r   r	   rN   r8   rI   ri   anyr   r   )#r   r   r   r   r   r   r   matmul_node_nameq_weightk_weightv_weightqwkwvwd
qkv_weightqkv_weight_nameqkv_matmul_output
qkv_matmul	qkv_nodesq_slice_namek_slice_namev_slice_nameend_of_qkv_nameqkv_last_axis_nameq_slice_outputq_slicek_slice_outputk_slicev_slice_outputv_sliceq_outputk_outputv_outputinitializer_inputs#                                      r   create_packed_qkv_matmul_node-FusionAttention.create_packed_qkv_matmul_node  s7   4  ::66x@ ~~a HNN1$55(..:Kx~~^_O`:``` ::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@!!(+!!(+!!(+xx288#BHH(<<<HHQKXXrrl3;;QAJG
*]: ((""1%z'7'7':;	 	 	
" -z9%%NN1%7&'!	

 :>9M9M$$%56L	 (*::,+:K:KSTRU]^\_ejk'*::,+:K:KSTRU]^\_ejk'*::,+:K:KSTRU]^ab]b\cino*-@@/[=N=NVWUX`ade`e_flqr-0@@"4@Q@QYZX[cebflqr)H4""%|\CUV#$,,W5	
 6:5I5I$$W\\2)H4""%|\CUV#$,,W5	
 6:5I5I$$W\\2)H4""%|_FXY#$,,W5	
 6:5I5I$$W\\2'7G45111 )-)C)CEKKPQN)S)SAYZ!66+..tzz/I/I%++VgJh/ijkk9GEKK$5 56$H$$U+?C?S?SD00< )-)C)CEKKPQN)S)SAYZ!66+..tzz/I/I%++VgJh/ijkk9GEKK$5 56$H$$U+?C?S?SD00< )-)C)CEKKPQN)S)SAYZ!66+..tzz/I/I%++VgJh/ijkk9GEKK$5 56$H$$U+?C?S?SD00< 	  +8++r   r   key_padding_maskunidirectionalr   r   
packed_qkvc                    US:  d   eUS:  a  X-  S:w  a  [         R                  SX5        gU R                  R                  5       R                   Vs1 s H  nUR
                  iM     nnU R                  R                  S5      n/ nU(       aW  U R                  UUUUUU5      u  nnnUR                  UR                  S   UR                  S   UR                  S   /5        GO=[        U[        5      (       a  [        U[        5      (       a  U R                  (       a<  UR                  UR                  S   UR                  S   UR                  S   /5        OUR                  UR                  S   UR                  S   UR                  S   /5        O[        U[        5      (       at  [        U[        5      (       a_  UU;   aY  UU;   aS  U R                  (       a!  UR                  UR                  S   X#/5        O"UR                  UR                  S   X#/5        OgU R                  (       d%  U R                  XEUU5      nUR                  U5        OUR                  S5        U(       a  U(       a  UR                  XX/5        O U
(       d  U(       a  UR                  X/5        U	/nU(       a  U(       a  UR                  UU/5        [         R"                  " SUUUS9nSUl        UR&                  R                  [         R(                  " S	U5      5        U(       a9  UR&                  R                  [         R(                  " S
[+        U5      5      5        U R-                  S5        U$ s  snf )a  Create a MultiHeadAttention node.

Args:
    q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
    k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
    v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
    q_add (NodeProto): name of Add from Q path
    k_add (NodeProto): name of Add from K path
    v_add (NodeProto): name of Add from V path
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    output (str): output name of MHA
    key_padding_mask (str): name of key padding mask
    add_qk (str): name of add after Q x K'
    unidirectional (bool): whether to apply causal attention mask automatically or not
    past_k (str): name of past K value - (batch_size, num_heads, past_sequence_length, head_size)
    past_v (str): name of past V value - (batch_size, num_heads, past_sequence_length, head_size)
    present_k (str): name of present K value - (batch_size, num_heads, sequence_length, head_size)
    present_v (str): name of present V value - (batch_size, num_heads, sequence_length, head_size)
    packed_qkv (bool): whether to combine MatMuls from Q, K, V paths
                       Note: This is for the scenario where an Attention node should be created but cannot be created
                       because past_key and past_value are separate inputs and not one concatenated input.

Returns:
    Union[NodeProto, None]: the node created or None if failed.
r   9input hidden size %d is not a multiple of num of heads %dNrm    rl   r5   com.microsoftrf   r!  )r   r   r   graphr|   r8   rF   r  rI   r   r~   r   ri   r]   r   r   r
   rG   domainrH   rJ   intincrease_counter)r   r   r   r   r   r   r   rf   re   r   r   r   r!  r   r   r   r   r"  r   graph_input_namesmha_node_name
mha_inputsr  r  r  r   mha_outputsmha_nodes                               r   create_multihead_attention_node/FusionAttention.create_multihead_attention_node  s   ^ 1}}? 7A=LLTVam37::3C3C3E3K3KL3K4TYY3KL

33K@ 
(,(J(J)%GWg w~~a0'..2CW^^TUEVWX),,Hi1P1P55!!5<<?HOOA4FUV"XY!!8??1#5xq7I8??[\K]"^_x%%8S))----55!!5<<?H"GH!!8??1#5x"JK 5555eE=YIi(b! f/HI/89 h	956## 	
 *!!&"7"7Y"OP%%f&;&;<LcR`Na&bc23E Ms   M2r%   first_input
add_qk_strcausalscalec                 >   US:  d   eU	S:  a  X-  S:w  a  [         R                  SX5        gSnUc  Uc  Uc  SnU R                  R                  UR                  S   5      nU R                  R                  UR                  S   5      nU R                  R                  UR                  S   5      nSu  nnnU(       Ga"  U R                  R                  UR                  S   5      =(       d(    U R                  R                  UR                  S   5      nU R                  R                  UR                  S   5      =(       d(    U R                  R                  UR                  S   5      nU R                  R                  UR                  S   5      =(       d(    U R                  R                  UR                  S   5      nU(       a  U(       a  U(       a  U(       d  gUc  [        UR                  S    S35        g[        R                  " U5      n[        R                  " U5      n[        R                  " U5      nUR                  UR                  :X  d   eUR                  S   nUR                  S   nUR                  S   nUUs=:X  a  U:X  d   e   eU	S:  a  U	U:w  a  [         R                  S	U	U5        Sn UR                  UR                  :w  a  Sn [        R                  " UR                  SS 5      n![        R                  " UR                  SS 5      n"[        R                  " UR                  SS 5      n#Sn$U (       a!  [        R                  " UUU4SS
9n%U!U"-   U#-   n$O[        R                  " UUU4SS
9n%SU!-  n$Sn&Sn'U(       Ga  [        R                  " U5      n([        R                  " U5      n)[        R                  " U5      n*[        R                  " U(R                  5      n+[        R                  " U)R                  5      n,[        R                  " U*R                  5      n-U+U,s=:X  a  U!:X  d   e   eU-U#:X  d   eU (       a!  [        R                  " U(U)U*4SS
9n'U+U,-   U--   n&O[        R                  " U(U)U*4SS
9n'SU+-  n&U R                  R                  S5      n.U R                  (       d*  U R!                  U.S-   UR"                  U[%        U$5      /U%S9  U(       a)  U R!                  U.S-   UR"                  [%        U&5      /U'S9  U R                  (       a  U(       a  [         R                  S5        gUR&                  S   UR&                  S   UR&                  S   U.S-   /n/Ub  U/R)                  U5        [*        R,                  " SU/U/U.S9n0U R/                  S5        GO-U
U.S-   U(       a  U.S-   OS/n/Ub  U/R)                  U5        OU/R)                  S5        U=(       a    Un1U1(       a"  U R1                  X5      n2U/R)                  U25        U(       a)  U1(       d  U/R)                  S5        U/R)                  U5        U/n3U(       a]  U(       aV  UR3                  SS5      R3                  SS5      R3                  SS5      n4U3R)                  U45        U R5                  UUU45        [*        R,                  " SU/U3U.S9n0U R/                  S5        SU0l        U0R8                  R;                  [*        R<                  " SU5      /5        U(       a1  U0R8                  R;                  [*        R<                  " SS5      /5        Ub1  U0R8                  R;                  [*        R<                  " SU5      /5        U (       a4  U0R8                  R;                  [*        R<                  " SU!U"U#/5      /5        U R>                  bD  U0R8                  R;                  [*        R<                  " S[A        U R>                  5      5      /5        U0$ )a  Create an Attention node.

Args:
    mask_index (str | None): mask input
    q_matmul (NodeProto): MatMul node in fully connection for Q
    k_matmul (NodeProto): MatMul node in fully connection for K
    v_matmul (NodeProto): MatMul node in fully connection for V
    q_add (NodeProto): Add bias node in fully connection for Q
    k_add (NodeProto): Add bias node in fully connection for K
    v_add (NodeProto): Add bias node in fully connection for V
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    first_input (str): first input name
    output (str): output name
    add_qk_str (str): name of Add node after Q x K'
    causal: whether it is uni-directional mask.
    past_k (str): name of input for past K value
    past_v (str): name of input for past V value
    present_k (str): name of output to store present K value
    present_v (str): name of output to store present V value
    scale: scale before softmax

Returns:
    Union[NodeProto, None]: the node created or None if failed.
r   r$  NTFr:   )NNNzl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (%d) is not same as weight matrix dimension of q,k,v (%d). Please provide a correct input hidden size or pass in 0r   r{   rm   r   r   r   zVMultiHeadAttention does not support relative_position_bias: cannot fuse the attention.rl   r5   r%  z.key_keyr   r   r&  rf   r!  r5  qkv_hidden_sizesro   )!r   r   r   rK   r|   printr   r   r   r   r   r   concatenater   rF   rh   rL   r>   r)  r   r   r
   rG   r*  r   r   r   r(  rH   rI   rJ   ro   float)5r   r%   r   r   r   r   r   r   rf   re   r2  r   r3  r4  r   r   r   r   r5  has_biasr  r  r  r   r   r   r  r  r  
qw_in_size
kw_in_size
vw_in_sizeis_qkv_diff_dimsqw_out_sizekw_out_sizevw_out_sizeqkv_weight_dimr
  r   r   r   r   r   q_bias_shapek_bias_shapev_bias_shapeattention_node_nameattention_inputsattention_nodepast_existspast_kvattention_outputs
present_kvs5                                                        r   create_attention_node%FusionAttention.create_attention_node  s   \ 1}}? 7A=LLTVam=U]u}H::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@!1ZZ//A?m4::C]C]^c^i^ijk^lCmFZZ//A?m4::C]C]^c^i^ijk^lCmFZZ//A?m4::C]C]^c^i^ijk^lCmFf>>!$% &g g !!(+!!(+!!(+ xx288###XXa[
XXa[
XXa[
Z5:55555?{j8NNJ	 !88rxx#
 ggbhhqrl+ggbhhqrl+ggbhhqrl+R1=J(;6DN2r2,Q7J_N&*%%f-B%%f-B%%f-B77288,L77288,L77288,L<>;>>>>>;...>>2r2,Q?+l:\I88RRLq9 </"jj99+F,,  (=8",, #n"56	 !    (;6 **,'(	 !  ((uv """#k1	  % ''
3#--$'(	N !!"67 #m35=#k12 
 % ''
3 ''+ +VK..8 ''0"$++B/ ''
3!'Y&..vr:BB62NVVWZ\_`
!((4iJ?#--')(	N !!+. /  '')>)>{I)V(WX$$++V-B-BCSUV-W,XY$$++V-B-B7E-R,ST$$++&&'9KVa;bcd !!-$$++V-B-BCVX]^b^t^tXu-v,wxr   c                    UnUnUR                   S:X  a$  U R                  R                  USS5      nUb  UnOg U R                  R                  U/ SQ/ SQ5      nS nUb  Uu    ppO+U R                  R                  U/ SQ/ SQ5      nUb  Uu  ppOg / n[	        UR
                  5       H4  u  pX;  a  M  XS   R                  S   :X  a  M#  UR                  U5        M6     [        U5      S:w  a  g US   nU R                  R                  US	S5      nUbm  UUR                  S      nUb5  [        U5      S
:X  a&  US   nUR                   S:X  a  UR                  S   nOcg Ub  [        U5      S:X  a  UR                  S   nO@g UR                   S:X  a/  UU   nU H$  nUR                   S:X  d  M  UR                  S   nM&     UU   nUR                   S:X  a(  [        UR                  5      S:X  a  UR                  S   nUU   nU Vs/ s H  nUR                   PM     nnUR                  S5      S:w  a  g U R                  R                  U/ SQ/ SQ5      nUc  [        R                  S5        g Uu    n	nnSnSnSnSn/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4S.nS n UR                  5        HQ  u  n!n"U R                  R                  UU"S   U"S   5      n U c  M.  U!S :X  a  S!nOU!S":X  a  S!nOU!S#:X  a  S!nOU!S$:X  a  S!n  O   U c  [        R                  S%5        g S n#S n$S n%S n&U(       a  U u  n	n%n$n	O4U(       a  U u  n	n#n%n$O%U(       a  U u    n	n$OU(       a	  U u  n	n#n$n&n	OU u  n	n#n	n$U&=(       d    U$n&U R                  R                  U&/ SQ/ S&Q5      n'U'c:  U R                  R                  U&/ S'Q/ S(Q5      n'U'c  [        R                  S)5        g U'S*   n(U'S+   n)U'S,   n*U$n+U(       a?  U R                  R                  U$S	S-/SS /5      n,U,c  [        R                  S.5        g U,u  n+n	U R                  R                  U+/ SQU(       a  SOSSSS /5      n-U-c:  U R                  R                  U$/ S/Q/ S0Q5      n-U-c  [        R                  S15        g U-S+   n.U-S,   n/S n0S2n1U(       a7  U R                  R                  U%/ S3Q/ SQ4/ S4Q/ SQ4/ S5Q/ S6Q4/U5      u  n	n0n	OU(       a^  U R                  R                  U%/ S7Q/ S6Q4/ S4Q/ SQ4/U5      u  n	n0n	U#b+  U R                  U#5      n1U1c  [        R                  S8U#5        g OEU(       a  O=U R                  R                  U#/ S9Q/ S:Q4/ S;Q/ S<Q4/ S=Q/ S>Q4/ S?Q/ S@Q4/U5      u  n	n0n	U(       d  U0c  [        R                  SA5        g U(       d  [        U05      S:  a  U R                  R                  U0S   5      u  n	n2U2b>  [!        U2["        R$                  5      (       a  U2R&                  S:X  a  [)        U25      S:  a  g [)        U25      SB:w  a  [)        U25      U l        UR
                  S   U:X  Ga  U*R
                  S   U:X  Gaw  U/R
                  S   U:X  Gab  U(       d+  U R,                  R/                  U0S,   R
                  S   5      OS n3Uc  W
OUn4U R1                  U(5      u  n5n6U5S::  d  U6S::  a  [        R3                  SC5        g U R5                  U3U*U/UU)U.UU5U6UU4R                  S   U1SD9n7U7c  g U R6                  R                  U75        U R8                  U R:                  U7R<                  '   Ub  UR
                  S   n8SEU8-   n9U R?                  SFU8-   [@        RB                  S/SSU5[E        U6U5-  5      /SSG9n:U R                  RG                  [H        RJ                  " SHU4R                  S   U:R<                  /U9/SIU8-   5      U R8                  5        U9UR
                  S'   U RL                  RO                  U4X/5        U RL                  RO                  U 5        U RL                  RO                  U RP                  (       d  U'OU'S S, 5        U RL                  RO                  U RP                  (       d  U-OU-S S, 5        U RL                  RO                  U RP                  (       d  UOUS S, 5        S!U l)        g g g g s  snf )JNrd   Addr   )rR  r   Reshape	Transposer   )NNr   r   r   )rR  EinsumrT  r   )r:   Nr   r   r:   Mulrz      rc   ry   r   r{   )rT  rS  rR  r   )r:   r   r   Nz&fuse_attention: failed to match v pathF)SoftmaxrR  Divr   )r   r   Nr   )rX  rR  rV  r   )rX  Wherer   rY  )r   r   rz   r   )rX  rR  rZ  r   )r   r   r   rz   )rX  rY  r   )r   r   r   )rX  rR  r   rV  Sqrt)r   r   Nr   r:   )path1path2path3path4path5sdpar^  Tr_  r`  ra  z'fuse_attention: failed to match qk path)r   r   r   N)rY  rT  rS  rR  r   )r   r   r   r   Nz&fuse_attention: failed to match q pathr   r[  z/fuse_attention: failed to match mul sqrt q path)rT  rT  rS  rR  r   )r:   r   r   r   Nz&fuse_attention: failed to match k pathr%  )ExpandrS  Equal)re  r   r   )Castrd  rS  re  )r   r   r   r   )rf  re  r   r   z6fuse_attention: failed to verify shape inference of %s)rV  Subrf  r   r   )Nr   r:   r   r   )rV  rg  r   r   )Nr   r:   r   )rZ  rf  rg  rd  r   r   )Nr   r   r:   r   r   )rZ  rf  rg  rf  rd  r   r   )Nr   r   r:   r   r   r   z)fuse_attention: failed to match mask pathizmFailed to detect num_heads and hidden_size for Attention fusion. Please specify those parameters in argument.)r%   r   r   r   r   r   r   rf   re   r2  r   r3  edge_modified_shape_modified_tensorr=   rS  reshape_modified_)*r   r   match_parentmatch_parent_path	enumerater|   r   r   r*   countr   r   itemsmatch_parent_pathsr   get_constant_inputr~   r   r   r   r;  ro   rg   rV   r   r   rO  r   r   r   r8   rL   r	   rN   r)  rO   r
   rG   nodes_to_removerI   rh   prune_graph);r   r   input_name_to_nodesoutput_name_to_nodenormalize_node
start_nodeadd_before_layernormr  einsum_noder   reshape_qkvtranspose_qkv
matmul_qkvother_inputs_i
node_input
root_inputmul_before_layernormmul_childrenlayernorm_nodechildrenchildparent_nodechildren_typesv_nodesadd_vmatmul_v
is_distillis_distill_addis_no_mask_attentionis_sdpaqk_pathsqk_nodeskvr   	matmul_qkwhere_qkafter_qq_nodesr   add_qmatmul_qafter_kmul_k_nodesk_nodesadd_kmatmul_k
mask_nodesr3  mul_valr%   attention_last_nodeq_num_headsq_hidden_sizenew_nodeunique_indexnew_edgeshape_tensors;                                                              r   fuseFusionAttention.fusev  sy	    #
!!%99#'::#:#:>5RS#T #/1
 JJ00?!
	
  =F:Qz 

44DoI $>G;
'
(8(89NB4q\0033
+ : |!!!_
  $zz66z5!L+./C/J/J1/MNL'C,=,B!-a!))-AA!/!6!6q!9J)c,.?1.D188;
##';;*:6H!==$88!&aJ " **5"::s;CUCU?VZ[?[$++A.J&z25=>XE%--X>)Q.**..z;dfuv?LLAB")Auh
$9?K9?K;\J;\J2I>@BTU
 NN$DAqzz33J!adKHG|!
g!%g'+$f % LLBC	*2'Q)Q/7,Q)! (Q919.Q	7A(0%Q9&Y**..w8acrs?jj22@"G
 EFBK	2;**66y5&/TUW[S\]K"NO&LWa**..>gSTVWYZ\`@a
 ?jj22F"G
 EF2; 

#zz<<3Y?8)D;\J
 $ Az1 #zz<<@,O8)D $ Az1 !!008
%LL!Y[ab!#zz<<EGYZ=OQShiY[st $
 Az1 $
(:LLDE#J!(;66z!}EJAw "7BJJ77GLLA<M'Na'W~').w&>>!
*x~~a/@J/NS[SaSabcSdhrSrZn,,99*R.:N:Nq:QRtxJ1<1D+-)-)K)KI)V&Ka=A#5C  11%!!!%)&*11!4% 2 H $$X.:>:N:ND((7&*003+l:#330<?)//QS1L-MN  4   

##$$!,33A68I8IJ!
+l:	 (( (0!!!$  '')<m(XY  ''1   ''t7T7TZabeceZfg  ''t7T7TZabeceZfg  ''t7T7TZabeceZfg  $DI Ts/N*q ?s   c.)rg   ri   re   rq   ro   rf   rp   rs  rr   rs   rh   )r%  r%  Fr%  r%  r%  r%  F)r%  Fr%  r%  r%  r%  N)rX   rY   rZ   r[   r\   r   r)  r   boolr   r]   r   r   tupler   r   r   r   r   r   r   r  r0  r;  rO  r  r^   __classcell__)ru   s   @r   ra   ra   g   s    04).27&>@T%U%% % 	%
 &,% #'% ,0% c% %40	 0eTWY\T\o 0>,&y ,&U3PS8_ ,&\Y * S  25 5S 5S 5n+Ks +KC +K# +KZ 4 4	
  
T	<M,M, M, 	M,
 M, 4M, 4M, 
y)Y.	/M,v !#$ %ww c/D(w c/D(	w
 w 4w 4w w w w w w w w w  !w" #w$ %w& 
T	'wL "'h$Jh h 	h
 h h h h h h h h h h h  !h" #h$ %h& t|'h( 
T	)hTo$ o$r   ra   )loggingr   numpyr   fusion_baser   fusion_optionsr   fusion_utilsr   r   onnxr   r	   r
   r   
onnx_modelr   rX   r   r   ra   r_   r   r   <module>r     sD   
    . 1 = =  	8	S Sl~$f ~$r   