
    hU                         S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	  S SK
Jr  \" \5      r " S S\5      r " S	 S
\5      rg)    )	getLoggerN)Fusion)FusionUtils)helper)	OnnxModelc                   P   ^  \ rS rSrSrS\S\4U 4S jjrS rS r	S r
S	 rS
rU =r$ )FusionGptAttentionPastBase   z3Base class for GPT Attention Fusion with past statemodel	num_headsc                 v   > [         TU ]  USSS/S5        X l        [        U5      U l        0 U l        S U l        g )N	AttentionLayerNormalizationSkipLayerNormalizationz	with past)super__init__r   r   utilscasted_attention_maskmask_filter_valueselfr   r   	__class__s      g/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_gpt_attention.pyr   #FusionGptAttentionPastBase.__init__   s?    .BD\-]_jk" '
%'"!%    c                    U R                   R                  USU5      nUb  UR                  S:w  a  [        R	                  S5        g U R                   R                  US5      S:w  a  [        R	                  S5        g UR                  S   nU R                   R                  USU5      nU(       a  UR                  S:X  a  UnO?U R                   R                  USS/SS/5      nUc  [        R	                  S5        g US   nU R                   R                  US5      S:w  a  [        R	                  S	5        g UR                  S   n	XY:w  a  [        R	                  S
5        g U$ )Nr   Gatherz,match_past_pattern_1: expect Gather for past   z9match_past_pattern_1: expect indices=1 for Gather of past	Transposez7match_past_pattern_1: failed match Transpose and Gatherz;match_past_pattern_1: expect indices=0 for Gather k of pastz,match_past_pattern_1: expect past to be same)r   
get_parentop_typeloggerdebugfind_constant_inputinputmatch_parent_path)
r   concat_kconcat_voutput_name_to_nodegatherpastparentgather_past_kpast_k_nodespast_ks
             r   match_past_pattern_1/FusionGptAttentionPastBase.match_past_pattern_1   s7   & &&x4GH>V^^x7LLGH::))&!49LLTU||A&&x4GHfnn0"M::77;PXBY\]_`[abL#VW(,M::))-;q@LLVW$$Q'>LLGHr   c                 h   U R                   R                  USU5      nUb  UR                  S:w  a  [        R	                  S5        g U R                   R                  USU5      nUb  UR                  S:w  a  [        R	                  S5        g U R                   R                  5       nUS:  aj  [        R                  " USS/5      (       d  [        R	                  S5        g [        R                  " US	S
S
/5      (       d  [        R	                  S5        g OsU R                  R                  US
S/5      (       d  [        R	                  S5        g U R                  R                  US
S
S
/5      (       d  [        R	                  S5        g [        R                  " USSSS9(       d  [        R	                  S5        g UR                  S   nU R                   R                  USS/SS/5      nUc  [        R	                  S5        g US   R                  S   n	Xy:w  a  [        R                  S5        g U$ )Nr   Squeezez:match_past_pattern_2: expect Squeeze as parent of concat_vSplitz0match_past_pattern_2: expect Split for past path   axesz:match_past_pattern_2: axes != [0] for Squeeze in past pathsplitr   z<match_past_pattern_2: split != [1, 1] for Split in past pathaxis)default_valuezKmatch_past_pattern_2: attribute axis of Split are not expected in past pathz7match_past_pattern_2: failed to match past_k_nodes pathr    z,match_past_pattern_2: expect past to be same)r   r!   r"   r#   r$   get_opset_versionr   check_node_attributer   check_node_input_valuer&   r'   info)
r   r(   r)   r*   squeezer8   opset_versionr,   r/   r0   s
             r   match_past_pattern_2/FusionGptAttentionPastBase.match_past_pattern_2K   s   , **''!5HI?goo:LLUV

%%gq2EF=EMMW4LLKL

446233GVaSIIYZ33E7QFKK[\ L ::44Wa!EEYZ::44UA1vFF[\//vqPQRLLfg{{1~zz33Hy'>RUVXYTZ[LLRSb!''*>KKFGr   c                    U R                   R                  USUSS9nU(       d  [        R                  S5        g U R                   R                  USUSS9nU(       d  [        R                  S5        g UR                  S   nU$ )N	UnsqueezeF)	recursivezexpect unsqueeze for presentConcatzexpect concat for presentr   )r   find_first_child_by_typer#   r>   output)r   r)   input_name_to_nodesunsqueeze_present_vconcat_presentpresents         r   match_present(FusionGptAttentionPastBase.match_present   s    "jjAAk#6% B 
 #KK67<<+>% = 
 KK34 ''*r   c                 6   XR                   ;   a  U R                   U   nU$ U R                  R                  U5      (       a-  U R                  R	                  U5      u  p2X R                   U'   U$ U R                  R                  U5      u  p$X R                   U'   U$ N)r   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32)r   
input_nameattention_mask_input_namecasted	cast_nodes        r   cast_attention_mask.FusionGptAttentionPastBase.cast_attention_mask   s    333(,(B(B:(N% )( ZZ((4404

0T0TU_0`-F5N&&z2 )( 48::3Q3QR\3]0%5N&&z2((r   )r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   intr   r1   rA   rM   rX   __static_attributes____classcell__r   s   @r   r	   r	      s5    =&i &C &/bAF"	) 	)r   r	   c                   D   ^  \ rS rSrSrS\S\4U 4S jjrS rS r	Sr
U =r$ )	FusionGptAttention   zH
Fuse GPT-2 Attention with past state subgraph into one Attention node.
r   r   c                 $   > [         TU ]  X5        g rP   )r   r   r   s      r   r   FusionGptAttention.__init__   s    *r   c
                    U R                   R                  S5      n
[        R                  " SXaX(U/U
S-   U/U
S9nSUl        UR
                  R                  [        R                  " SU R                  5      [        R                  " SU	(       a  SOS	5      /5        U R                  bD  UR
                  R                  [        R                  " S
[        U R                  5      5      /5        [        R                  " SU
S-   UR                  S   /U
S-   /U
S-   S9n[        R                  " SU
S-   UR                  S   /U/U
S-   S9nU R                  R                  XU/5        U R                  U R                  UR                  '   U R                  U R                  UR                  '   U R                  U R                  UR                  '   g )NGptAttentionr   _output)inputsoutputsnamezcom.microsoftr   unidirectionalr   r   r   MatMul_matmul_output_matmulAdd   _add)r   create_node_namer   	make_nodedomain	attributeextendmake_attributer   r   floatr&   nodes_to_addthis_graph_namenode_name_to_graph_namerm   )r   	fc_weightfc_biasgemm_qkvr,   rL   r&   rH   maskis_unidirectionalattention_node_nameattention_nodematmul_nodeadd_nodes                 r   create_attention_node(FusionGptAttention.create_attention_node   s    #jj99.I))gT:(94g>$	
 !0  ''%%k4>>B%%&6=NTUV	
 !!-$$++V-B-BCVX]^b^t^tXu-v,wx&&')3X^^A5FG(+;;<$y0	
 ##'*::HNN1<MNH$v-	
 	  .x!HI<@<P<P$$^%8%899=9M9M$$[%5%566:6J6J$$X]]3r   c                    S nS n/ nUR                   S:H  nS nU(       d"  U R                  R                  U/ SQ/ SQUUS9nO!U R                  R                  U/ SQ/ SQUUS9nUc  g S n	U(       d   Uu  n
nnnnnnU
R                  SUS   -
     n	O	Uu  nnnnnnU R                  R                  U/ S	Q/ S
Q5      nUc  [        R                  S5        g Uu  nnnnU R                  R                  U/ SQ/ SQU5      nUc"  U R                  R                  U/ SQ/ SQU5      nUc  U R                  R                  U/ SQ/ SQU5      nUc"  U R                  R                  U/ SQ/ SQU5      nUc  [        R                  S5        g US   R                  S   nU R                  R                  US   5      u  nnUS   R                  U   nO$US   R                  S   nUS   R                  S   nUS   nU	b&  U	UR                  ;  a  [        R                  S5        g SnS nS nS nU R                  R                  U/ SQ/ SQ5      n U b  U u  n!n"n#n$n%U R                  R                  U"/ SQ/ SQ5      n&U&c  [        R                  S5        g U&S   n'U&S   nU$U':w  a  [        R                  S5        g [        U&5      S:  aB  U&S   R                   S:X  a/  U R                  R                  U&S   5      u  nn(U(S:w  a  U(* U l        GOU R                  R                  U/ S Q/ S!Q4/ S"Q/ S#Q4/U5      u  nn nU c  [        R                  S$5        g U S%   n)U S&   n$U S   n%US:X  a  U S   n*U R                  R                  U*/ S'Q/ S(Q4/ S)Q/ S*Q4/ S+Q/ S,Q4/U5      u  nnnUc  [        R                  S-5        g [        U5      S:  aA  US   R                   S:X  a.  U R                  R                  US   5      u  nn(U(S:w  a  U(U l        U R                  R                  U)/ S.Q/ S/Q4/ S0Q/ S1Q4/U5      u  nn&nU&c  [        R                  S25        g U&US:X  a  SOS   nU R                  R                  U&S   SU5      n+U+R                   S3:X  a  U+n'U$U':w  a  [        R                  S5        g O(U+R                   S4:X  a  U+nO[        R                  S25        U R                  R                  UR                  S   5      n,[        U,[        R                  5      (       aL  [        U,R                  5      S5:X  a3  U,R                  S S S6:X  a   U,R                  S   U,R                  S   :X  d  [        R                  S75        g [        R                   " U,[        R"                  " U,5      5      (       a  S8nOZ[        R                   " U,[        R$                  " [        R"                  " U,5      5      5      (       d  [        R                  S95        g U R                  R                  U%/ S:Q/ S;Q5      n-U-c  [        R                  S<5        g U-u  n.n/n0UU0:w  a  [        R                  S=5        g U R                  R                  U%/ S	Q/ S
Q5      n1U1cC  U R                  R                  U%/ S>Q/ S?Q5      n1U1c  [        R                  S@5        g U1u  nn2n3n4n5OU1u  n2n3n4n5UU5:w  a  [        R                  SA5        g U(       a  U2U:w  a  [        R                  SB5        g SCn6Ub#  US   R                  S   n7U R'                  U75      n6U R)                  U2UU5      =(       d    U R+                  U2UU5      nUc  [        R-                  SD5        g U R                  R/                  U5      (       d  [        R                  SE5        U R1                  UU5      nUc  [        R-                  SF5        g U R                  R3                  U5      (       d  [        R-                  SG5        g U R5                  UUUUUUR6                  S   UR6                  S   U6U5	        SU l        g )HNr   )rr   ReshapeGemmr   r   r   ro   )r   Nr   r   r   r   r   )r*   return_indice)r   r   r   r   r   ro   )Nr   r   r   r   r   r   r   )rF   r   r   r5   )r   r   r   r   z&fuse_attention: failed to match v path)r   r   r   r   )r   r   r   r   )r   r   r   r   )rr   ro   r   )r   Nr   )rr   ro   r   z'fuse_attention: failed to match fc pathrs   r    zCUpstream Add and (Skip)LayerNormalization shall have one same inputT)SoftmaxSubMulDivro   )r   r   r   r   r   )
r   r   Slicer   rD   r   r4   r   Shaper   )
r   r   r   r   r   r   r   r   r   r   z8fuse_attention: failed to match unidirectional mask path   z-fuse_attention: skip since div_qk != div_maskr   i)r   Wherer   ro   )r   r   r   r   )r   rr   r   r   ro   )r   r   Nr   r   z(fuse_attention: failed to match qk nodes)r   r   CastrD   rD   r   )Nr   r   r   r   r   )r   r   rD   rD   r   )Nr   r   r   r   )r   r   rD   rD   )Nr   r   r   z9fuse_attention: failed to match input attention mask path)r   r   r   rD   r   r4   r   r   )r   r   r   r   r   r   r   r   )r   r   rD   r   r4   r   r   )r   r   r   r   r   r   r   z)fuse_attention: failed to match mask pathr   rF      )r   r   z4fuse_attention: skip since mask shape is not 1x1xWxWFzDfuse_attention: skip since mask is neither lower triangular nor ones)r   r   r5   )r   r   r   z&fuse_attention: failed to match q pathz.fuse_attention: skip since split_fc != split_q)r   rF   r   r   r5   )r   r   r   r   r   z&fuse_attention: failed to match k pathz.fuse_attention: skip since split_fc != split_kz8fuse_attention: skip since concat_k != concat_k_to_match z)fuse_attention: failed to match past pathzpast is not graph input.z,fuse_attention: failed to match present pathz!expect present to be graph output)r"   r   r'   r&   r#   r$   get_constant_inputlenr   match_parent_pathsr!   get_constant_value
isinstancenpndarrayshapeallclose	ones_liketrilrX   r1   rA   r>   rQ   rM   find_graph_outputr   rH   prune_graph)8r   normalize_noderI   r*   r,   rL   r   is_normalize_node_skiplayernorm	qkv_nodesanother_inputadd_qkvreshape_qkvr   	reshape_1	reshape_2transpose_qkv
matmul_qkvv_nodesr)   transpose_v	reshape_vsplit_fcfc_nodesr   i_r   layernorm_before_attentionr   
slice_maskinput_mask_nodesconcat_k_to_matchqk_nodes
softmax_qksub_qkmul_qkdiv_qk	matmul_qk
mask_nodesdiv_maskmul_valwhere_qkadd_qkdiv_or_concat	mask_dataq_nodestranspose_q	reshape_qsplit_qk_nodesr(   transpose_k	reshape_ksplit_krU   rT   s8                                                           r   fuseFusionGptAttention.fuse   s   *8*@*@D\*\'	.

44W($7+ 5 I 

44P%$7+ 5 I .  $MM!mA.>*>?M  **..z;fhtu?LLAB7>4;	8 :://@	
 zz33H#	H zz337#	H ::77? '	 FG ))!,I::00!=DAqqk''*G ))!,Iqk''*G%-b\" $>X>^>^)^LL^_ 
 :://
<fhwx>F;Z55 /J  !WX!"~H#AJ!LM:"z!}'<'<'E!ZZ:::a=I
7f$.5XD* "ZZ:::LIACUV $NAx GH|Hb\F IAv!!)-)F)F X1
 P.
 E+ (!*&#Q$ $+LL!\]'(1,1A!1D1L1LPU1U!%!>!>?OPQ?R!SJAw&(18.#zz<< d0 \-
 $ Az1 !HI#aAQ7J JJ11*R.!EXYM$$-(X%LL!PQ & &&(2$1!HI JJ11*2B2B12EF	y"**--IOO$)#v-"iooa&88LLOP;;y",,y"9:: %YY0G(HIILL_`**..y:[]fg?LLAB,3)iwLLIJ**..y:egst?jj22HG
 EFAH>Hk9g:A7X{IwwLLIJ->!>LLST$&!')"-33A6J(,(@(@(L% ((8=PQ 
UYUnUnh 3V
 <KKCDzz**400LL34 $$X/BC?KKFGzz++G44KK;<""&--a0q!%
	
  r   )r   r   )rZ   r[   r\   r]   r^   r   r_   r   r   r   r`   ra   rb   s   @r   rd   rd      s.    +i +C +.K`   r   rd   )loggingr   numpyr   fusion_baser   fusion_utilsr   onnxr   
onnx_modelr   rZ   r#   r	   rd    r   r   <module>r      sA   
    $   	8	X) X)vw 3 w r   