
    h3                     h    S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	  \" \
5      rS r " S S\5      rg)	    )	getLoggerN)FusionGptAttentionPastBase)helper)	OnnxModelc                 "    [        X-
  5      S:*  $ )Ngư>)abs)valueexpected_values     p/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_gpt_attention_megatron.pyis_closer      s    u%&$..    c                   J   ^  \ rS rSrSrS\S\4U 4S jjrS rS r	S r
S	rU =r$ )
FusionGptAttentionMegatron   zV
Fuse GPT-2 Attention with past state subgraph from Megatron into one Attention node.
model	num_headsc                 $   > [         TU ]  X5        g )N)super__init__)selfr   r   	__class__s      r   r   #FusionGptAttentionMegatron.__init__   s    *r   c                 Z   U R                   R                  S5      nU R                  U5      n	UR                  S   n
UR                  S   UR                  S   :X  a  SOSn[
        R                  " SUUR                  S   UR                  U   U	U/X/US9nSUl        UR                  R                  [
        R                  " SU R                  5      [
        R                  " SS5      /5        U R                  bD  UR                  R                  [
        R                  " S	[        U R                  5      5      /5        U/nU R                  R                  U5        U H&  nU R                  U R                   UR"                  '   M(     U R$                  R'                  U5        S
U l        g )NGptAttentionr      	Attention)inputsoutputsnamezcom.microsoftr   unidirectionalmask_filter_valueT)r   create_node_namecast_attention_maskoutputinputr   	make_nodedomain	attributeextendmake_attributer   r!   floatnodes_to_addthis_graph_namenode_name_to_graph_namer   nodes_to_removeappendprune_graph)r   matmul_before_splitadd_before_splitpastpresentr%   reshape_qkvmaskattention_node_name
int32_maskr$   iattention_noder,   nodes                  r   fuse_attention_node.FusionGptAttentionMegatron.fuse_attention_node   s    #jj99.I--d3
##A&"((+/B/I/I!/LLAST))#))!, &&q) %$
 !0  ''%%k4>>B%%&6:	
 !!-$$++V-B-BCVX]^b^t^tXu-v,wx&'  . D6:6J6JD((3 ! 	##K0  r   c                    U R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        g Uu  pgp[	        U5      S:  aA  US   R
                  S:X  a.  U R                   R                  US   5      u  pUS:w  a  U* U l        UR                  S   UR                  S   :w  a  [        R                  S5        g U R                  R                  USS	5      (       d  [        R                  S
5        g U R                  R                  USS5      (       d  [        R                  S5        g U R                   R                  U	R                  S   5      (       d  [        R                  S5        g U R                  R                  USS/5      (       d  [        R                  S5        g U R                  R                  USS/5      (       d  [        R                  S5        gU R                  R                  USS/5      (       d  [        R                  S5        gU R                  R                  U	SS/5      (       d  [        R                  S5        g U R                  R                  U	SS/5      (       d  [        R                  S5        g U R                   R                  U/ SQ/ SQ5      nUb	  US   U:w  a  [        R                  S5        g U R                   R                  U	/ SQ/ SQ5      nUb	  US   U:w  a  [        R                  S5        g U R                   R                  U	/ SQ/ SQ5      nUb	  US   U:w  a  [        R                  S5        g U R                   R                  U	/ SQ/ S Q5      nUc!  U R                   R                  U	/ S!Q/ S Q5      nUb	  US   U:w  a  [        R                  S"5        g U	R                  S   $ )#N)MulSubSlicerB   )r   r   r   r   z8fuse_attention: failed to match unidirectional mask pathr   r   r@   i'  zCfuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]g     @z?fuse_attention failed: mul_mask input 1 is not constant 10000.0g      ?z;fuse_attention failed: sub_mask input 0 is not constant 1.0z+expect slick_mask input 0 to be graph inputzKfuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]   zIfuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]F   zJfuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]   zDfuse_attention failed: slice_mask input 3 (axes) is not constant [2]zEfuse_attention failed: slice_mask input 4 (steps) is not constant [1])	UnsqueezeGatherShapeMatMul)rE   r   r   r   z/fuse_attention: failed to match last slice pathz0fuse_attention: failed to match first slice path)rF   rA   rG   rH   rI   )r   r   r   r   r   z3fuse_attention: failed to match last slice sub path)rF   rA   rG   rH   LayerNormalization)r   r   r   r   r   )rF   rA   rG   rH   SkipLayerNormalizationz5fuse_attention: failed to match last slice sub path 1)r   match_parent_pathloggerdebuglenop_typeget_constant_inputr!   r%   r$   utilscheck_node_input_valuefind_graph_inputinfo)r   sub_qkmul_qk	matmul_qklayernorm_before_attention
mask_nodesmul_masksub_masklast_slice_mask
slice_mask_mul_vallast_slice_pathfirst_slice_pathfirst_slice_subfirst_slice_sub_1s                   r   
match_mask%FusionGptAttentionMegatron.match_maskJ   sG   ZZ11&:Z\hi
LLST<F9_z?Q:a=#8#8E#A66z!}EJA%*1&<<?o44Q77LL^_zz001gFFLLZ[zz001cBBLLVWzz**:+;+;A+>??KKEFzz00!aSIILLfgzz00!aSIILLdezz00!aSIILLefzz00QDDLL_`zz00QDDLL`a**66G
 "ob&9Y&FLLJK::77BL
 #'7';y'HLLKL**66=

 "ob&9Y&FLLNO JJ88I
 $ $

 < <Q! $(9"(=A[([LLPQ""r   c           	      
   S nS nUR                   S:H  nS nU(       d!  U R                  R                  U/ SQ/ SQUS9nO U R                  R                  U/ SQ/ SQUS9nUc  g S nU(       d  Uu  n	n
nnnnU	R                  S   nOUu  n
nnnnUR                  S   nU R                  R                  U/ SQ/ S	Q5      nUc!  U R                  R                  U/ S
Q/ S	Q5      nUc  [        R                  S5        g Uu  nnnnnnnUR                   S:X  a)  UUR                  S   :w  a  [        R                  S5        g UR                   S:X  a)  UUR                  S   :w  a  [        R                  S5        g U R                  R                  U/ SQ/ SQ5      nUc  [        R                  S5        g Uu  nnnnU R                  R                  US5      S:w  a  [        R                  S5        g U R                  UUUU5      nU R                  R                  U/ SQ/ SQ5      nUc  [        R                  S5        g Uu  nnn n!UU!:w  a  [        R                  S5        g U R                  R                  U/ SQ/ SQ5      n"U"c  [        R                  S5        g U"u  n#n$n%n&n'n(UU(:w  a  [        R                  S5        g U R                  R                  U'5      u  n)n*[        U*[        R                  5      (       a>  [        U*R                  5      S/:X  a$  U*S   S:X  a  U*S   S:X  a  U*S   S:  a	  U*S   S:  d  [        R                  S5        g U*S   n+U+U R                  :w  a,  [        R!                  SU+ S U R                   35        U+U l        U*S   n,U R                  R                  U#5      u  n)n*[#        [        R$                  " [        R$                  " U,5      5      5      n-['        U*U-5      (       d  [        R                  S!U* S"U- 35        g U R                  R                  U5      u  n)n*['        U*U-5      (       d  [        R                  S#U* S"U- 35        g U R)                  U%UU5      nUc  [        R                  S$5        g U R                  R+                  U5      (       d  [        R                  S%5        U R-                  UU5      nUc  [        R                  S&5        g U R                  R/                  U5      (       d  [        R!                  S'5        g U R1                  UUUUUR                  S   UU5        g )(NrL   )Addri   rI   Reshape	TransposerI   )r   r   Nr   r   r   )output_name_to_node)ri   rI   rj   rk   rI   )r   Nr   r   r   r   )Concatrk   rj   Splitri   rI   rK   )r   r   r   r   r   Nr   )rm   rk   rj   rn   ri   rI   rL   z&fuse_attention: failed to match v pathrK   zAfuse_attention: skip_input != layernorm_before_attention.input[0]rC   )SoftmaxrA   r@   rI   )r   r   r   r   z'fuse_attention: failed to match qk pathaxisz+fuse_attention failed: softmax_qk axis != 3)Divrk   rj   rn   z&fuse_attention: failed to match q pathz-fuse_attention: skip since split_v != split_q)rq   rk   rm   rk   rj   rn   )r   r   r   r   r   r   z&fuse_attention: failed to match k pathz-fuse_attention: skip since split_v != split_krD   r   rE   z:fuse_attention: reshape constant input is not [0, 0, N, H]zDetected num_heads=z. Ignore user specified value zfuse_attention: div_k value=z
 expected=zfuse_attention: div_q value=z!fuse_attention: match past failedz(fuse_attention: past is not graph input.z$fuse_attention: match present failedz1fuse_attention: expect present to be graph output)rQ   r   rM   r%   rN   rO   r$   get_node_attributerf   rR   
isinstancenpndarraylistshaper   rV   r+   sqrtr   match_past_pattern_2rU   match_presentfind_graph_outputr=   ).r   normalize_nodeinput_name_to_nodesrl   r4   r5   is_normalize_node_skiplayernorm	qkv_nodes
skip_inputadd_skipadd_after_attentionmatmul_after_attentionr6   transpose_qkv
matmul_qkvv_nodesconcat_vtranspose_v	reshape_vsplit_vr3   r2   rZ   qk_nodes
softmax_qkrW   rX   rY   attention_maskq_nodesdiv_qtranspose_q	reshape_qsplit_qk_nodesdiv_kr`   concat_ktranspose_k	reshape_ksplit_kr:   r	   r   hidden_size_per_headr
   s.                                                 r   fuseFusionGptAttentionMegatron.fuse   s#   *8*@*@D\*\'	.

44J%$7	 5 I 

44C"$7	 5 I 
. #& "*J #& (--a0J**.. %
 ?jj22 )G ?LLAB 	
& '..2FF8>>qAALL\] '..2JJ8??BBLL\]:://
<_amnLLBC2:/VVY::((V<ALLFGD^_**..y:bdpq?LLAB3:0YgLLHI**..K

 ?LLAB@G=8[)WgLLHI::00;5ubjj))U[[!aS(aAaAa1a1LLUV!H	&KK-i[8VW[WeWeVfgh&DN$Qx::0075rwwrww/C'DEF~..LL7wjHXYZ::0075~..LL7wjHXYZ ((8=PQ<LL<=zz**400LLCD $$X/BC?LL?@zz++G44KKKL  &--a0	
r   )r!   r   r1   )__name__
__module____qualname____firstlineno____doc__r   intr   r=   rf   r   __static_attributes____classcell__)r   s   @r   r   r      s4    +i +C +- ^X#t
 
r   r   )loggingr   numpyrt   fusion_gpt_attentionr   onnxr   
onnx_modelr   r   rN   r   r    r   r   <module>r      s4   
   ;   	8	/P
!; P
r   