
    hG                        S SK r S SKJr  S SKJr  S SKJrJrJrJ	r	J
r
  S SKJr  \ R                  " \5      r " S S\5      r " S S	\5      rg)
    N)FusionAttention)Fusion)FunctionProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                      ^  \ rS rSrSrS\S\S\4U 4S jjr       SS\S	\S
\	S\	S\	S\S\S\S\S\S\S\
S-  S\	S-  4S jjrS rS rS rSrU =r$ )FusionRotaryAttention   z]
Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
modelhidden_size	num_headsc                 ,   > [         TU ]  UUUS/ SQS9  g )NT)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationLayerNormalizationSkipLayerNormalizationAdd)use_multi_head_attentionsearch_op_types)super__init__)selfr   r   r   	__class__s       j/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_rotary_attention.pyr   FusionRotaryAttention.__init__   s(     	%) 	 	
    Ninputoutputq_rotaryk_rotaryv_matmul	attn_maskadd_qkpast_kpast_v	present_k	present_vscalereturnc                    U R                   S:  d   eU R                  S:  aM  U R                  U R                   -  S:w  a0  [        R                  SU R                   SU R                    35        g U R                  R                  S5      nUR                  S   UR                  S   UR                  S   SUUUU	/nU/nU
(       a  U(       a  UR                  X/5        [        R                  " SUUUS9nSUl
        UR                  R                  [        R                  " SU R                   5      /5        Ub1  UR                  R                  [        R                  " S	U5      /5        U R                  bD  UR                  R                  [        R                  " S
[        U R                  5      5      /5        U R                  S5        U$ )Nr   z)fuse_rotary_attention: input hidden size z# is not a multiple of num of heads MultiHeadAttention inputsoutputsnamecom.microsoftr   r+   mask_filter_value)r   r   loggerdebugr   create_node_namer!   extendr   	make_nodedomain	attributemake_attributer5   floatincrease_counter)r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   mha_node_name
mha_inputsmha_outputsmha_nodes                    r   create_mha_node%FusionRotaryAttention.create_mha_node(   s    ~~!!!aT%5%5%F1$LLL;D<L<L;MMpquqq  qA  B 

334HIOOAOOAOOA	

 h	56## 	
 *!!6#8#8dnn#U"VW%%v'<'<We'L&MN!!-%%v'<'<=PRWX\XnXnRo'p&qr23r   c	                 &   U R                   R                  US/S/5      n	U R                   R                  US/S/5      n
U	b  U
c  gU	S   U
S   pU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nUb	  Ub  Ub  Uc  gUu  nnnUu  nnnUR                  S   U:w  d  UR                  S   U:w  a  gUS   R                  UR                  :w  d  US   R                  UR                  :w  a  gU R                   R                  US/S/5      nU R                   R                  US/S/5      nUb  Uc  gUS   US   nnU R                   R                  U/ S	Q/ S
Q5      nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nUb	  Ub  Ub  Uc  gUS   R                  UR                  :w  dW  US   R                  UR                  :w  d:  US   R                  UR                  :w  d  US   R                  UR                  :w  a  gU R                   R                  US/S/5      nUc  gUS   nU R                   R                  U/ S	Q/ S
Q5      n U R                   R                  U/ SQ/ SQ5      n!U b  U!c  gU S   R                  UR                  :w  d  U!S   R                  UR                  :w  a  gU R                   R                  US/S/5      n"U"c  gU"S   n#U R                   R                  U#/ S	Q/ S
Q5      n$U R                   R                  U#/ SQ/ SQ5      n%U$b  U%c  gU$S   R                  UR                  :w  d  U%S   R                  UR                  :w  a  gU$S   n&U S   n'US   n(UR                  S   n)U&R                  S   U):w  d&  U'R                  S   U):w  d  U(R                  S   U):w  a  gU R                   R                  U/ SQ/ SQ5      n*U R                   R                  U/ SQ/ SQ5      n+U*b  U*u  nn,n-OU+b  U+u    nn,n-OgU-R                  S   S;  a  gU R                   R                  U,/ SQ/ SQ5      n.U R                   R                  U-/ SQ/ SQ5      n/U R                   R                  U-S/S/5      n0U.b  U/b  U0c  gU.S   R                  U/S   R                  :w  d   U.S   R                  U/S   R                  :w  a  gU/S   R                  S   U0S   R                  S   :w  a  gg)NConcat   Fr   	UnsqueezeGatherShaper   r   r   rH   r   r   )   r   r   )rJ   MulrK   rL   r   r   r   r   )rJ   r   rK   rL   rH   r   r   r   rO   )rO   r   r   r   rG   SlicerT   CastrG   rT   rT   >   r%   attention_mask)rO   r   rH   r   rJ   T)r   match_parent_pathr    r3   r!   )1r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2reshape_v_1r&   
root_inputconcat_qkv_2_pathconcat_qkv_1_pathconcat_qkv_2concat_qkv_1reshape_qkv_2_path_1reshape_qkv_2_path_2reshape_qkv_1_path_1reshape_qkv_1_path_2_gather_1shape_1gather_2shape_2concat_v_2_pathconcat_v_1_path
concat_v_2
concat_v_1reshape_v_2_path_1reshape_v_2_path_2reshape_v_1_path_1reshape_v_1_path_2concat_k_2_path
concat_k_2reshape_k_2_path_1reshape_k_2_path_2concat_q_2_path
concat_q_2reshape_q_2_path_1reshape_q_2_path_2mul_qmul_kmul_vgather_1_outattn_mask_path_1attn_mask_path_2
slice_qk_2
slice_qk_1slice_qk_2_pathslice_qk_1_path_1slice_qk_1_path_2s1                                                    r   &check_runtime_shape_paths_for_function<FusionRotaryAttention.check_runtime_shape_paths_for_function`   s    !JJ88
UVTWX JJ88
UVTWX$(9(A%6q%9;LQ;Ol#zz;;LJjluv#zz;;LJjluv#zz;;LJjluv#zz;;LJjluv (#+#+#+38W38W ==z)W]]1-=-K  "''8==8<PQR<S<X<X\d\i\i<i **66{XJQRPST**66{XJQRPST"o&=!0!3_Q5GJ
!ZZ99?
 "ZZ99?
 "ZZ99*Ffhqr!ZZ99*Ffhqr&!)!)!) q!&&(--7!!$))X]]:!!$))X]]:!!$))X]]: **66{XJQRPST"$Q'
!ZZ99?
 "ZZ99?
 %);)C a %%6:LQ:O:T:TX`XeXe:e **66{XJQRPST"$Q'
!ZZ99?
 "ZZ99*Ffhqr%);)C a %%6:LQ:O:T:TX`XeXe:e #1%"1%"1%q);;q>\)U[[^|-Ku{{[\~amOm  ::77@\^gh::77@dfrs'(8%Az:)+;(Aq*jA&EE**66?
 !JJ88?
 !JJ88k]UVTWX"&7&?CTC\ 1""&7&:&?&???STCUCZCZ^opq^r^w^wCw Q%%a(,=a,@,F,Fq,IIr   c                 P   U R                   R                  US/S/5      nUc  gUS   nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      n	Ub  U	c  gUu  pnU	u  pnUR                  S   U:w  d  UR                  S   U:w  a  gU R                   R                  US/S/5      nUc  gUS   nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nUb  Uc  gUS   R                  UR                  :w  d  US   R                  UR                  :w  a  gU R                   R                  US/S/5      nUc  gUS   nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nUb  Uc  gUS   R                  UR                  :w  d  US   R                  UR                  :w  a  gU R                   R                  US/S/5      nUc  gUS   nU R                   R                  U/ SQ/ SQ5      nU R                   R                  U/ SQ/ SQ5      nUb  Uc  gUS   R                  UR                  :w  d  US   R                  UR                  :w  a  gg)	NrG   rH   Fr   rI   rM   rN   T)r   rX   r    r3   )r   reshape_qkv	reshape_q	reshape_k	reshape_vr_   concat_qkv_path
concat_qkvreshape_qkv_path_1reshape_qkv_path_2rh   ri   rj   rk   rl   concat_v_pathconcat_vreshape_v_path_1reshape_v_path_2concat_k_pathconcat_kreshape_k_path_1reshape_k_path_2concat_q_pathconcat_qreshape_q_path_1reshape_q_path_2s                              r   #check_runtime_shape_paths_for_nodes9FusionRotaryAttention.check_runtime_shape_paths_for_nodes   s    **66{XJQRPST"$Q'
!ZZ99*Ffhqr!ZZ99*Ffhqr%);)C1W1W ==z)W]]1-=-K 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8a 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8a 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8ar   c                   ^  UR                   S;  a  g S nT R                  R                  U/ SQ/ SQ5      nT R                  R                  U/ SQ/ SQ5      nT R                  R                  U/ SQ/ SQ5      nUb	  Uu  ppnUnO-Ub  Uu  ppUnO"Ub	  Uu    ppUnO[        R	                  S5        g Su  pnS nS nT R                  R                  U/ S	Q/ S
Q5      nT R                  R                  U/ SQ/ SQ5      nT R                  R                  U/ SQ/ SQ5      nT R                  R                  U/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/ SQ/ SQ4/	S S9u  nnnT R                  R                  U/ S Q/ S!Q5      nUby  Uu  nnnnnnUnT R                  R                  US"S#/S$S%/5      nUc  [        R	                  S&5        g US$   R                  S$   nUS'   R                  S$   nUR                  S$   nOUb(  Uu  nnnnUnUR                  S$   nUR                  S$   nOUb  Uu  nnnUnUR                  S$   nOUb=  [        U5      S(:X  a.  US$   S)S  u  nnnnUnUR                  S$   nUR                  S$   nODUb+  Uu  nnnnnUnUnUR                  S$   nUR                  S$   nO[        R	                  S*5        g T R                  R                  U/ S+Q/ S,Q5      nS-u  nn Ub  Uu  nnnn O[        R	                  S.5        g S/u  n!n"T R                  R                  U/ S0Q/ SQ5      n#T R                  R                  U/ S1Q/ SQ5      n$T R                  R                  U/ S2Q/ S3Q5      n%T R                  R                  U/ S4Q/ S5Q5      n&T R                  R                  U/ S6Q/ S7Q5      n'T R                  R                  U/ S8Q/ S3Q5      n(T R                  R                  U/ S9Q/ S:Q5      n)U#b  U#u  nn*n+U*R                  S$   n!OU$b  U$u    nn*n+U*R                  S$   n!OU%b"  T R                  U%S$   R                  S$   5      n"OU&b"  T R                  U&S$   R                  S$   5      n"OgU'b  U'S$   R                  S$   n"OQU(b  U(S$   R                  S$   n"O;U)b"  T R                  U)S$   R                  S$   5      n"O[        R	                  S;5        g S/u  n,n-S n.S n/S n0T R                  R                  U / S<Q/ S
Q5      n1T R                  R                  U / S=Q/ SQ5      n2T R                  R                  U / S>Q/ S?Q5      n3T R                  R                  U / S@Q/ S:Q4/ SAQ/ SBQ4/ SCQ/ SDQ4/ SEQ/ SFQ4/ SGQ/ SHQ4/ SIQ/ SJQ4/ SKQ/ SLQ4/ SIQ/ SMQ4/ SIQ/ SNQ4/	S S9u  nn4nT R                  R                  U / SOQ/ SPQ5      n5U1b  U1u  n6nn7nn8n9U1n.T R                  R                  U7S"S#/S$S%/5      n:U:c  [        R	                  SQ5        g U:S$   R                  S$   n,U:S'   R                  S$   n;U7R                  S$   n-UU;:X  d   eOU2b  U2u  nn8nn<n9U2n.U8R                  S$   n-OU3b*  U3u  nn7n8nn<n9U3n.U7R                  S$   n,U7R                  S$   n-OU4bF  [        U45      S(:X  a7  U4S$   SRS  u  n<n9U4S$   SSST u  n7n8U4n.U7R                  S$   n,U7R                  S$   n-OFU5b-  U5u	  nn7n0n8n/nn<nn9U5n.U7R                  S$   n,U7R                  S$   n-O[        R	                  SU5        g S n=S n>S n?T R                  R                  U / SVQ/ S,Q5      n@T R                  R                  U / SWQ/ S,Q5      nAT R                  R                  U / SXQ/ SYQ5      nBU@b
  W@u  nCnnDnEU@n=O3WAb
  WAu  nDnnFnEUAn=O&WBb  WBu  n?nDn>nnFnnEUBn=O[        R	                  SZ5        g WER                  S$   U9R                  S$   :w  a6  U9R                  S$   UR                  S$   :w  a  [        R	                  S[5        g S\nGXE:X  aP  T R                  W	W
WCW6WWUWER                  S$   5      (       d  [        R	                  S]5        g U	R                  S$   nGOXFU4;   a  T R                  WWFW<WWER                  S$   5      (       d  [        R	                  S]5        g UR                  S$   nGU>(       a  U>R                  S$   OWER                  S$   WDR                  S$'   U/(       a  U/R                  S$   OU9R                  S$   U8R                  S$'   U?c  U8R                  S^-   U8R                  S$'   XG:X  a  US_S  nU 4S` jnHU?(       Ga  U0(       Ga  T R                  R                  Sa5      nIUIS^-   nJ[        R                  " SaU0R                  S$   /UJ/UISb9nKUKR                   R#                  [        R$                  " Sc/ SdQ5      /5        T R                  R                  Sa5      nLULS^-   nM[        R                  " SaU?R                  S$   /UM/ULSb9nNUNR                   R#                  [        R$                  " Sc/ SdQ5      /5        WH" W<5      nOUOc  [        R	                  Se5        g T R                  R                  SfSgSh9nP[        R                  " SfWKR                  S$   WOR                  S$   /UPS^-   /UPSb9nQT R                  R                  SfSiSh9nR[        R                  " SfWNR                  S$   UOR                  S$   /URS^-   /URSb9nSUQn8USnDT R&                  R)                  UO5        T R&                  R)                  UK5        T R&                  R)                  UN5        T R&                  R)                  UQ5        T R&                  R)                  US5        T R*                  T R,                  UOR                  '   T R*                  T R,                  UKR                  '   T R*                  T R,                  UNR                  '   T R*                  T R,                  UQR                  '   T R*                  T R,                  USR                  '   T R/                  WER                  S$   WGWDU8UU!U"U,UU-U5      nTUTc  [        R	                  Sj5        g T R&                  R)                  WT5        T R*                  T R,                  UTR                  '   T R0                  R#                  US_S  5        UU:w  a'  T R0                  R#                  Uc  US S' OUS SR 5        O$US$   S'   /nUU H  nVT R3                  UVWU5        M     T R0                  R#                  U5        U.U1:X  a   T R0                  R#                  U.S SR 5        GOTU.U2:X  a[  T R0                  R)                  U.S$   5        T R0                  R)                  U.S%   5        T R0                  R)                  U.Sk   5        OU.U3:X  ay  T R0                  R)                  U.S$   5        T R0                  R)                  U.S_   5        T R0                  R)                  U.Sk   5        T R0                  R)                  U.Sl   5        OtU.U5:X  a=  T R0                  R)                  U.S$   5        T R0                  R)                  U.S_   5        O1U.U4:X  a+  U.S$   S'   U.S$   S)   /nUU. H  nVT R3                  UVWU5        M     U=W@:X  a  T R0                  R#                  U=S SR 5        OBU=WA:X  a<  T R0                  R)                  U=S_   5        T R0                  R)                  U=S%   5        SmT l        g )nN>   r   r   r   )MatMulReshape	Transposer   r   rH   r   r   r   r   )r   r   r   r   rR   )	AllReducer   r   r   r   z0fuse_rotary_attention: failed to match qkv nodes)r/   r/   r/   )r   r   rG   r   r   r   )rH   r   r   rH   r   r   )rG   r   r   r   )rH   rH   r   r   )r   r   r   rN   )r   ExpandrJ   rG   r   r   r   )rH   r   r   r   rH   r   r   )r   r   WhereEqualr   rG   rJ   rK   rL   rG   r   r   r   )rH   r   rH   r   r   r   r   r   r   r   rH   r   r   )r   r   r   r   rP   ConstantOfShaperL   r   rG   rJ   rK   rL   rG   r   r   r   )rH   r   rH   r   rH   r   r   r   r   rH   r   r   r   rH   r   r   )r   r   r   r   rL   r   rG   rJ   rK   rL   rG   r   r   r   )rH   r   rH   rH   r   r   r      r   r   r   rH   r   r   )r   r   r   r   rG   rJ   rK   rL   rG   r   r   r   )rH   r   rH   rO   r      r   r   r   rH   r   r   )	r   rG   rJ   rK   rL   rG   r   r   r   )	rH   rH   r   r   r   r   rH   r   r   )
r   rG   rJ   rP   rK   rL   rG   r   r   r   )
rH   rH   rH   r   r   r   r   rH   r   r   )	rH   rH   rO   r   r   r   rH   r   r   )	rH   rH   r   r   r   r   rH   r   r   )output_name_to_node)rG   r   r   r   r   )rH   rH   r   r   rH   rT   rJ   r   rO   zDfuse_rotary_attention: failed to match past/present concat in v path	   z-fuse_rotary_attention: failed to match v path)Softmaxr   Divr   rQ   NNz/fuse_rotary_attention: failed to match qk nodes)r/   r/   rS   rU   )r   r   SubrV   r   rJ   rJ   )rH   r   rO   rH   r   r   r   )r   r   rV   r   rJ   rJ   )rH   rO   rH   r   r   r   )r   r   r   r   rV   r   rJ   rJ   )rH   r   r   rO   rH   r   r   r   )r   r   r   rV   r   rJ   rJ   )	r   rV   r   rV   r   rV   r   rJ   rJ   )	rH   r   r   r   r   rH   r   r   r   z;fuse_rotary_attention: failed to match attention mask nodes)r   r   rG   r   RotaryEmbeddingr   )r   r   r   r   r   )r   rG   r   r   r   r   )rH   r   rH   r   r   r   )	r   r   r   rJ   rG   r   r   r   r   )r   r   r   r   r   r   rG   rJ   rK   rL   rG   r   r   r   r   )rH   r   r   rH   r   r   r   r   r   r   r   rH   r   r   r   )r   r   r   r   r   rP   r   rL   r   rG   rJ   rK   rL   rG   r   r   r   r   )rH   r   r   rH   r   rH   r   r   r   r   rH   r   r   r   rH   r   r   r   )r   r   r   r   r   rL   r   rG   rJ   rK   rL   rG   r   r   r   r   )rH   r   r   rH   rH   r   r   r   r   r   r   r   rH   r   r   r   )r   r   r   r   r   rG   rJ   rK   rL   rG   r   r   r   r   )rH   r   r   rH   rO   r   r   r   r   r   rH   r   r   r   )r   r   rG   rJ   rK   rL   rG   r   r   r   r   )rH   r   rH   r   r   r   r   rH   r   r   r   )r   r   rG   rJ   rP   rK   rL   rG   r   r   r   r   )rH   r   rH   rH   r   r   r   r   rH   r   r   r   )rH   r   rH   rO   r   r   r   rH   r   r   r   )rH   r   rH   r   r   r   r   rH   r   r   r   )	r   rG   rG   r   rT   r   r   r   r   )	rH   r   rH   r   r   r   r   r   rH   zDfuse_rotary_attention: failed to match past/present concat in k pathz.fuse_rotary_attention: failed to match k nodes)r   r   r   r   )r   r   r   r   )rG   r   rT   r   r   r   r   )r   r   r   r   r   r   rH   z.fuse_rotary_attention: failed to match q nodeszKfuse_rotary_attention: failed to find the same root_input for q, k, v pathsr/   z;fuse_rotary_attention: failed to verify runtime shape paths	_output_0rH   c                    > T
R                   R                  U SS5      nUc  [        R                  S5        gT
R                   R	                  UR
                  S   5      nT
R                   R	                  UR
                  S   5      nUb  Uc  [        R                  S5        gUS   nUS   nXE-  nT
R                   R                  S	S
S9nT
R                   R                  U5      c#  T
R                  U[        R                  S/U/SS9  T
R                   R                  SSS9n[        R                  " SUR
                  S   UR
                  S   U/US-   /US9n	U	R                  R                  [        R                  " SS5      /5        U	$ )zDetect num_heads and hidden_size for ONNX model from phi-2
Args:
    reshape_q (NodeProto): reshape node for q
Returns:
    hidden_size_concat_node(NodeProto): Concat node to be used by reshape
rG   rH   NzEfuse_rotary_attention: failed to trace the concat node from reshape_qrO   r   zMfuse_rotary_attention: failed to get constant nodes of num_heads or head_sizer   Initializerr   name_prefixF)r3   	data_typedimsvalsrawhidden_size_concatoutput_0r0   axis)r   match_parentr6   r7   get_constant_valuer    r8   get_initializeradd_initializerr   INT64r   r:   r<   r9   r=   )r   concatnum_head_constant_nodehead_size_constant_nodenum_head_valuehead_size_valuer   hidden_size_initilizerhidden_size_reshape_node_namehidden_size_concat_noder   s             r   create_hidden_size_concat_nodeBFusionRotaryAttention.fuse.<locals>.create_hidden_size_concat_node  s    ZZ,,Y!DF~de &*ZZ%B%B6<<PQ?%S"&*jj&C&CFLLQRO&T#%-1H1Plm3A6N5a8O(:K%)ZZ%@%@\i%@%j"zz))*@AI$$/)//% %  -1JJ,G,G^r,G,s)&,&6&6LLOLLO*
 7CD2	'# $--44f6K6KFTU6V5WX**r   r   r0   perm)r   rO   rH   r   z?fuse_rotary_attention: failed to create hidden_size_concat_noder   concat_k_halfr   concat_q_halfzSfuse_rotary_attention: failed to create multi-head attention with rotary embeddingsr   r   T)op_typer   rX   r6   r7   match_parent_paths_allr    r!   lenreshape_add_qkr   r   r3   r8   r   r:   r<   r9   r=   nodes_to_addappendthis_graph_namenode_name_to_graph_namerD   nodes_to_remove&add_nodes_to_remove_with_nodes_to_keepprune_graph)Wr   normalize_nodeinput_name_to_nodesr   	qkv_nodesqkv_nodes_1qkv_nodes_2qkv_nodes_3rh   rY   rZ   
matmul_qkvr   r(   r*   past_seq_lenv_nodesadd_v	v_nodes_1	v_nodes_2	v_nodes_3	v_nodes_4	v_nodes_5r]   r   r^   matmul_vr   transpose_vr   qk_nodesr&   	matmul_qkr%   
add_qk_strattn_mask_nodes_1attn_mask_nodes_2attn_mask_nodes_3attn_mask_nodes_4attn_mask_nodes_5attn_mask_nodes_6attn_mask_nodes_7slice_mask_1slice_mask_2r'   r)   k_nodesslice_kr   	k_nodes_1	k_nodes_2	k_nodes_3	k_nodes_4	k_nodes_5r\   r   rotary_kmatmul_kr   shared_past_seq_lenr   q_nodesslice_qr   	q_nodes_1	q_nodes_2	q_nodes_3r[   rotary_qmatmul_qr   root_outputr   k_transpose_node_namek_tranpose_output_namek_transpose_nodeq_transpose_node_nameq_tranpose_output_nameq_transpose_noder   concat_k_reshape_node_nameconcat_k_reshape_nodeconcat_q_reshape_node_nameconcat_q_reshape_nodenew_nodenodes_to_keep	temp_pathsW   `                                                                                      r   fuseFusionRotaryAttention.fuseE  s   !!)nn
 	jj22C

 jj228

 jj22E

 "=H:Aa
#I$,7)AA#I$/:,Aqq#ILLKL +5'<JJ00P
	
 JJ008
	
 JJ00.
	
 **;; c)
 <!&$ E',  ?#( 9$ v/
 3  v/
 v/OkX !%] < o
9a` JJ00?
	
  AJ>KHahG JJ88+&AM
 $cd"1%++A.F(,2215L *I"9B6Hk9hG^^A&F *I"/8,KHG#**1-I"s9~':9B1bc9J6Hk9hG^^A&F *I"@I=Hk9eXHG^^A&F *ILLHI::///

 '	&.#Avq)LLJK !'	: JJ88(

 !JJ880

 !JJ88O!

 !JJ88H

 !JJ88Y$

 !JJ88R!

 !JJ88a'

 (,=)A|\$++A.I*/@,Aq,$++A.I*,,->q-A-H-H-KLJ*,,->q-A-H-H-KLJ**1-44Q7J**1-44Q7J*,,->q-A-H-H-KLJLLVW
 #	JJ00X
	
 JJ00N
	
 JJ00X
	
 **;;
 0" B%*( K+0$ E',  ?#( 6" 9$ 6" 6gcH !%M < g
9aP JJ00r'
	
  >G;KHa8G JJ88+&AM
 $cd"1%++A.F"/"3"9"9!"< *I#6666"2;/AxIxG *I"<E9Ax1iG^^A&F *I"s9~':"+A,rs"3Ix!*1b!4HhG^^A&F *I"W`TAx'1iHG^^A&F *ILLIJ
 JJ00A
	
 JJ00A
	
 JJ00[!
	
  1:.KHhG"/8,HaHG"JSGM8WaAxGLLIJ>>!q 11hnnQ6G8>>Z[K\6\LLfg#>>q!	 	 Z['..q1K44;;q!  Z[%,,Q/K
 6=q 1(//RSBTHNN15<q 1(//RSBTHNN1 $%-]][%@"'%abM	2	+j ]$(JJ$?$?$L!%:[%H"%//%,,Q/0/0*	  &&--v/D/DV\/Z.[\ %)JJ$?$?$L!%:[%H"%//%,,Q/0/0*	  &&--v/D/DV\/Z.[\&DY&O#&.^_ *.)D)DY\k)D)l&$*$4$4(//24K4R4RST4UV3kAB/	%! *.)D)DY\k)D)l&$*$4$4(//24K4R4RST4UV3kAB/	%! -H,H$$%<=$$%56$$%56$$%:;$$%:;IMI]I]D(()@)E)EFBFBVBVD(()9)>)>?BFBVBVD(()9)>)>?GKG[G[D(()>)C)CDGKG[G[D(()>)C)CD''NN1
 LLno  *6:6J6J$$X]]3##IabM2i  ''7SVTV<X$QZ^,M$	;;I}U % 	##H-i  ''5	!  ''
3  ''
3  ''
3	!  ''
3  ''
3  ''
3  ''
3	!  ''
3  ''
3	!$QZ^WQZ^<M$	;;I}U % i  ''5	!  ''
3  ''
3r   )r   )r/   r/   r/   r/   r/   r/   N)__name__
__module____qualname____firstlineno____doc__r
   intr   strr   r>   rD   r   r   r  __static_attributes____classcell__r   s   @r   r   r      s    

 
 	
6 "66 6 	6
 6 6 6 6 6 6 6 6 t|6 
T	6pZxGRK  K r   r   c            
       t   ^  \ rS rSrS\4U 4S jjrS\S\4S jrS\4S jr	S	\
S
\
S\
S\
S\
4
S jrS rSrU =r$ )FusionRotaryEmbeddingsiS  r   c                 |   > SU l         [        TU ]	  XR                   U R                   U R                   S-   S/5        g )Nr   z.1r   )	base_namer   r   )r   r   r   s     r   r   FusionRotaryEmbeddings.__init__T  s3    *RVAVX]0^_r   rot_emb_nodefunctionc                 Z  ^ / / pCUR                    H  nUR                  S:X  d  M  UR                  / :X  d  M'  UR                  S   UR                  ;   d  MF  UR	                  U5        [        UR                  5      R                  UR                  S   5      nUR	                  UR                  U   5        M     / nU Hr  nUR                  S   R                  n	U R                  R                  S5      U	l        U R                  R                  U	5        UR	                  U	R                  5        Mt     [        XGSS9 Hh  u  mn
[        [        U4S jU R                  R                  R                  R                   5      5      nU H  n[         R"                  " UTU
5        M     Mj     U$ )NConstantr   F)strictc                 "   > TU R                   ;   $ N)r    )entryextra_outputs    r   <lambda>?FusionRotaryEmbeddings.reassign_extra_outputs.<locals>.<lambda>n  s    8Sr   )noder   r    r!   r   listindexr<   tr   r8   r3   r   zipfiltergraphr
   replace_node_input)r   r/  r0  extra_constantsextra_outputsfn_nodeoutput_indexextra_initializersextra_constantconstant_tensorprotoextra_initializernodes_to_updatenode_to_updater7  s                @r   reassign_extra_outputs-FusionRotaryEmbeddings.reassign_extra_outputs[  sd   )+R}}G*,"1DXYIZ^f^m^mIm&&w/#HOO4::7>>!;LM$$\%8%8%FG	 %  -N#1#;#;A#>#@#@ (,

(C(CJ(O %JJ&&';<%%&:&?&?@	 . 03=]b/c+L+"6*SUYU_U_UeUeUkUkUpUp#qrO"1,,^\K\] #2 0d
 r   r:  c                   ^^ U R                   R                  U R                  5      nU R                   R                  TSS/SS/5      nUb  Uu  pEO[        R                  S5        g UR                  S   TR                  S   /n[        [        U4S jU R                   R                   R                  R                  5      5      n[        [        U4S jU R                   R                   R                  R                  5      5      nSu  p[        U5      S:X  Ga  [        U5      S:X  Ga  U R                   R                  U	5      Gc  U R                   R                  U
5      Gc  [        R                  " US   R                   S   R"                  5      R%                  5       n[        R                  " US   R                   S   R"                  5      R%                  5       n[&        R(                  " U	[*        R,                  [        UR.                  5      UR1                  5       R3                  5       S	9nU R                   R5                  XR6                  5        [&        R(                  " U
[*        R,                  [        UR.                  5      UR1                  5       R3                  5       S	9nU R                   R5                  XR6                  5        U R8                  R;                  US   US   /5        UR;                  X/5        TR                  n[        U5      S:  a  [        [        U4S
 jU R                   R                   R<                  5      5      n[        U5      S:X  d   eU R?                  TUS   5      m[        [        U4S jU5      5      n[        U5      S:X  d   e[&        R@                  " U R                  UUUSS9nSUl!        U R8                  RE                  U5        U$ )Nr   r   r   z.fuse_rotary_embeddings: failed to match MatMulrH   c                 B   > U R                   S   TR                  S   :H  $ )Nr   rO   r!   r    constantr:  s    r   r8  OFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>      hooa6HDJJWXM6Yr   c                 B   > U R                   S   TR                  S   :H  $ )Nr   r   rP  rQ  s    r   r8  rS    rT  r   	cos_cache	sin_cacher3   r   r   r   c                 6   > U R                   TR                  :H  $ r5  )r3   r   )fnr:  s    r   r8  rS    s    "''T\\*Ar   c                    > U T;  $ r5   )output_namerC  s    r   r8  rS    s    TaAar   r1   r2   r3   interleavedr4   )#r   r8   r-  rX   r6   r7   r!   r    r;  r?  r@  r:  r   r   r	   to_arrayr<   r=  squeezer   make_tensorr   FLOATshapeflattentolistr   r   r   r9   	functionsrL  r:   r;   r   )r   r:  rotary_emb_node_namematmul_pathreshape_nodematmul_noderotary_emb_inputscos_cache_nodesin_cache_nodecos_cache_namesin_cache_namerW  rX  cos_cache_tensorsin_cache_tensorrotary_emb_outputsfuncrotary_emb_noderC  s    `                @r   &create_rotary_embeddings_from_function=FusionRotaryEmbeddings.create_rotary_embeddings_from_functiont  s@   #zz::4>>Jjj22!F

 "(3%L+LLIJ q!JJqM
 f%Y[_[e[e[k[k[q[q[v[vwxf%Y[_[e[e[k[k[q[q[v[vwx)A& 1$N#q(

**>:B

**>:B$--nQ.?.I.I!.L.N.NOWWYI$--nQ.?.I.I!.L.N.NOWWYI%11#%++)//*&&(//1	  JJ&&'79M9MN%11#%++)//*&&(//1	  JJ&&'79M9MN  ''):N1<M(NO  .!AB![[!"Q&A4::CSCSC]C]^_Dt9>!> 77d1gFM!%f-acu&v!w)*a/// **NN$&%
 "1##L1r   r_   position_ids	cos_slice	sin_slicer!   c                   ^^ U R                   R                  U R                  5      n[        [	        U4S jU R                   R                   R
                  R                  5      5      n[        [	        U4S jU R                   R                   R
                  R                  5      5      nSu  p[        U5      S:X  Ga  [        U5      S:X  Ga  U R                   R                  U	5      Gc  U R                   R                  U
5      Gc  [        R                  " US   R                  S   R                  5      R                  5       n[        R                  " US   R                  S   R                  5      R                  5       nUR                  S   nUS S 2S US-  24   nUS S 2S US-  24   n[        R                   " U	["        R$                  [        UR                  5      UR'                  5       R)                  5       S9nU R                   R+                  XR,                  5        [        R                   " U
["        R$                  [        UR                  5      UR'                  5       R)                  5       S9nU R                   R+                  XR,                  5        U R.                  R1                  US   US   /5        [        R2                  " U R                  XX/U/USS9nS	Ul        U$ )
Nc                 (   > U R                   S   T:H  $ Nr   r!   )rR  rz  s    r   r8  LFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>      hooa6HI6Ur   c                 (   > U R                   S   T:H  $ r~  r  )rR  r{  s    r   r8  r    r  r   rV  rH   r   rO   rY  r_  r4   )r   r8   r-  r;  r?  r@  r:  r   r   r	   ra  r<   r=  rb  re  r   rc  r   rd  rf  rg  r   r   r   r9   r:   r;   )r   r_   ry  rz  r{  r!   ri  rn  ro  rp  rq  rW  rX  	head_sizerr  rs  rv  s      ``            r   #create_rotary_embeddings_from_nodes:FusionRotaryEmbeddings.create_rotary_embeddings_from_nodes  ss     $zz::4>>J f%UW[WaWaWgWgWmWmWrWrstf%UW[WaWaWgWgWmWmWrWrst)A& 1$N#q(

**>:B

**>:B$--nQ.?.I.I!.L.N.NOWWYI$--nQ.?.I.I!.L.N.NOWWYI "*I!!%7	Q%7"78I!!%7	Q%7"78I%11#%++)//*&&(//1	  JJ&&'79M9MN%11#%++)//*&&(//1	  JJ&&'79M9MN  ''):N1<M(NO **NNnMH%
 "1r   c                   ^% U R                   UR                  ;  a  UR                  S:w  a  g S m%UR                  S:w  Ga  [        UR                  5      S;  d  UR                  S   S;  a  [        R                  S5        g U R                  U5      m%T%c  [        R                  S5        g U R                  R                  U5        [        [        U%4S jU R                  R                  R                  R                  5      5      n[        U5      S:X  d   eU R                  R                  R                  R                  R                  US   5        GO)U R                  R                  U/ S	Q/ S
Q5      nU R                  R                  U/ SQ/ S
Q5      nU=(       d    UnU R                  R                  U/ SQ/ SQ5      nU R                  R                  U/ SQ/ SQ5      n	U=(       d    U	n
Ub  U
c  [        R                  S5        g U R                  R                  U/ SQ/ SQ5      nU R                  R                  U/ SQ/ SQ5      nU=(       d    UnU R                  R                  U/ SQ/ SQ5      nU R                  R                  U/ SQ/ SQ5      nU=(       d    UnUb  Uc  [        R                  S5        g US   R                   US   R                   :w  d`  US   R                   U
S   R                   :w  d@  US   R                   US   R                   :w  d   US   R                   U
S   R                   :w  a  [        R                  S5        g U R                  R                  USS/SS/5      nU R                  R                  USS/SS/5      nU=(       d    UnUc  [        R                  S5        g Su  nnnU R                  R                  U/ SQ/ SQ5      nU R                  R                  U/ S Q/ S!Q5      nU R                  R                  U/ S"Q/ S#Q5      nU R                  R                  U/ S$Q/ S%Q5      nUb  UnUS&   R                  S   nOUb  UnUS'   R                  S   nOjUb'  UnUS&   R                  S   nUS(   R                  S   nO@Ub'  UnUS'   R                  S   nUS(   R                  S   nO[        R                  S)5        g S*u  nnU R                  R                  U/ SQ/ S+Q5      nU R                  R                  U/ S Q/ S,Q5      nU R                  R                  U/ S"Q/ S-Q5      nU R                  R                  U/ S$Q/ S.Q5      n Ub  UnUS&   R                  S   nOUb  UnUS'   R                  S   nOjUb'  UnUS&   R                  S   nUS(   R                  S   nO@U b'  U nUS'   R                  S   nUS(   R                  S   nO[        R                  S)5        g US/:X  a  U R                  R                  US(   S0/S/5      n!U R                  R                  US(   S0/S/5      n"U!b#  U"b   U!S   R                   U"S   R                   :w  a  [        R                  S15        g U"S   R                  S   nO/ n!/ n"S2u  n#n$UU:X  a  UU:X  d  UU:X  a^  UU:X  aX  US3   R                   US3   R                   :w  d   US   R                   US   R                   :w  a  [        R                  S45        g GOUU:X  a  UU:X  d  UU:X  a  UU :X  a  US   R                   US   R                   :w  a  [        R                  S55        g U R                  R                  US   S6S7/SS/5      n#U R                  R                  US   / S8Q/ S9Q5      n$U#bB  U$b?  U R                  R#                  U#S   R                  S   5      b  U$S   R                  S:w  a  [        R                  S:5        g O[        R                  S;5        U R%                  US   R&                  S   UUUUR&                  S   5      m%T%c  [        R                  S5        g U R)                  U/5        U R)                  US S 5        U R)                  US S 5        U R)                  US S 5        U R)                  U
S S 5        U R)                  US S 5        U R)                  U5        U R)                  U5        U R)                  U!S S 5        U R)                  U"S S 5        U#b<  [        U R                  R+                  U#S   5      5      S:X  a  U R)                  U#5        U$b  U R)                  U$S S 5        U R-                  U R                   5        U R.                  U R0                  T%R                   '   U R2                  R                  T%5        S<U l        g )=Nr   >   r      rH   >   pospos_idpos_idsposition_idry  zLfuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding functionz=fuse_rotary_embeddings: failed to create RotaryEmbedding nodec                 <   > U R                   TR                  S   :H  $ r~  )r3   r!   )r:  rv  s    r   r8  -FusionRotaryEmbeddings.fuse.<locals>.<lambda>  s    DII1G1G1J$Jr   r   )rP   rG   NegrT   r   r   )rP   rG   r  rT   rT   )	rP   rG   r  rT   rJ   r   rK   rL   r   )	rH   r   r   r   rH   r   r   r   r   )	rP   rG   r  rT   rJ   r   rK   rL   rT   z9fuse_rotary_embeddings: failed to match x2 in rotate_half)rP   rG   rT   r   )rH   r   rH   r   )rP   rG   rT   rT   )rP   rG   rT   rJ   r   rK   rL   r   )rH   r   rH   rO   r   r   r   r   )rP   rG   rT   rJ   r   rK   rL   rT   z9fuse_rotary_embeddings: failed to match x1 in rotate_halfr   zCfuse_rotary_embeddings: failed to match common input in rotate_halfrP   r   rT   z8fuse_rotary_embeddings: failed to match x in rotate_half)Nr/   r/   )	rP   rJ   rK   Squeezer  rT   rJ   rK   rL   )	rH   rH   r   r   r   r   rO   r   r   )rP   rJ   rK   r  r  rT   rJ   r   )rH   rH   r   r   r   r   rO   r   )rP   rJ   rK   rT   rJ   rK   rL   )rH   rH   r   r   rO   r   r   )rP   rJ   rK   rT   rJ   r   )rH   rH   r   r   rO   r   r   r   rO   z>fuse_rotary_embeddings: failed to match sin path in apply_rope)Nr/   )	r   rH   r   r   r   r   rO   r   r   )r   rH   r   r   r   r   rO   r   )r   rH   r   r   rO   r   r   )r   rH   r   r   rO   r   r/   r   zGfuse_rotary_embeddings: failed to match position ids path in apply_roper   r   zdfuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cachezRfuse_rotary_embeddings: failed to match common Add node in sin cache and cos cacherK   rL   )rK   rL   r   rM   zKfuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len pathsz:fuse_rotary_embeddings: failed to match common cache pathsT)r-  r   r   r    r6   r7   rw  r   r   r;  r?  r   r@  
value_inforemoverX   r3   find_graph_inputr  r!   add_nodes_to_removeget_childrenr?   r   r   r   r   )&r   r:  r   r   old_shape_inferrotate_half_x2_path_1_1rotate_half_x2_path_1_2rotate_half_x2_path_1rotate_half_x2_path_2_1rotate_half_x2_path_2_2rotate_half_x2_path_2rotate_half_x1_path_1_1rotate_half_x1_path_1_2rotate_half_x1_path_1rotate_half_x1_path_2_1rotate_half_x1_path_2_2rotate_half_x1_path_2x_path_1x_path_2x_pathsin_pathrX  ry  
sin_path_1
sin_path_2
sin_path_3
sin_path_4cos_pathrW  
cos_path_1
cos_path_2
cos_path_3
cos_path_4position_ids_from_sin_pathposition_ids_from_cos_pathpast_seq_len_pathcurr_seq_len_pathrv  s&                                        @r   r  FusionRotaryEmbeddings.fuse  s	   >>-$,,%2G <<5 4::f,

1 F 1 kl"II$OO&\]   ''- #JDJJL\L\LbLbLmLmnO '1,,,JJ""--44_Q5GH( '+jj&B&B>'# '+jj&B&B:'# %<$V?V!&*jj&B&Be+'# '+jj&B&Ba+'# %<$V?V!$,0E0MXY&*jj&B&B7'# '+jj&B&B3'# %<$V?V!&*jj&B&B^('# '+jj&B&BZ('# %<$V?V!$,0E0MXY &b)..2G2K2P2PP(,115J25N5S5SS(,115J25N5S5SS(,115J25N5S5SSbc zz33$AH zz33 AH )F~WX 1=-Hi55m+J
 55a(J
 55W%J
 55K"J
 %%$RL..q1	'%$RL..q1	'%$RL..q1	'{003'%$RL..q1	'{003]^ #+Hi55m+J
 55a(J
 55W%J
 55K"J
 %%$RL..q1	'%$RL..q1	'%$RL..q1	'{003'%$RL..q1	'{003]^ r!-1ZZ-I-IQKKC.*
 .2ZZ-I-IQKKC.* /6191!499=WXY=Z=_=__LL!jk9!<BB1E-/*-/*3=00J&8z+AJ&8z+AB<$$(9(99Xb\=N=NRZ[]R^RcRc=cLL~ 	 >d
 j(X-CJ&8z+AB<$$(9(99LL!uv$(JJ$@$@RLw'F%! %)JJ$@$@RL4%! &-(0zz223DR3H3N3Nq3QRZ(,44CLL!no D
 YZ"FF%b)003AO &\] $$dV,$$%:3B%?@$$%:3B%?@$$%:3B%?@$$%:3B%?@$$VCR[1$$X.$$X.$$%?%DE$$%?%DE ,TZZ5L5LM^_`Ma5b1cgh1h (():; ,(():3B)?@dnn-=A=Q=Q$$_%9%9:  1r   )r-  r   )r   r!  r"  r#  r
   r   r   r   rL  rw  r&  r  r  r'  r(  r)  s   @r   r+  r+  S  su    `i `9  2H9 HT66 6 	6
 6 6pA  A r   r+  )loggingfusion_attentionr   fusion_baser   onnxr   r   r   r   r	   
onnx_modelr
   	getLoggerr   r6   r   r+  r]  r   r   <module>r     sF   
  ,  L L  			8	$A O A H"d V d r   