
    h//                     n    S SK Jr  S SKrS SKJr  S SKJrJrJ	r	J
r
  S SKJr  \" \5      r " S S\5      rg)    )	getLoggerN)Fusion)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                      ^  \ rS rSrSrS\S\S\4U 4S jjrS\S\S	\	\\4   4S
 jr
S\S\S\S\S\S\S\S\S\S\S	\S-  4S jrS rSrU =r$ )FusionAttentionVae   zA
Fuse Attention subgraph of Vae Decoder into one Attention node.
modelhidden_size	num_headsc                 ^   > [         TU ]  USS/5        X l        X0l        SU l        SU l        g )N	AttentionSoftmaxT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__s       g/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_attention_vae.pyr   FusionAttentionVae.__init__   s3    i[9&" "&#'     	reshape_qadd_qreturnc                 &   U R                   R                  US5      nUb  [        UR                  5      S:w  a  U R                  U R
                  4$ U R                   R                  UR                  S   5      nUb/  [        U[        R                  5      (       a  UR                  S:X  d  U R                  U R
                  4$ [        U5      nUS::  a  U R                  U R
                  4$ U R                   R                  U5      u  pgUb/  [        U[        R                  5      (       a  UR                  S:w  a  U R                  U R
                  4$ UR                  S   nU R                  S:  aG  XPR                  :w  a8  U R                  (       a'  [         R#                  SXPR                  5        SU l        U R
                  S:  aG  XR
                  :w  a8  U R$                  (       a'  [         R#                  SXR
                  5        SU l        XX4$ )zDetect num_heads and hidden_size from a reshape node.

Args:
    reshape_q (NodeProto): reshape node for Q
    add_q (NodeProto): add node for Q

Returns:
    Tuple[int, int]: num_heads and hidden_size
         r   z?Detected number of attention heads is %d. Ignore --num_heads %dFz3Detected hidden size is %d. Ignore --hidden_size %d)r   
get_parentleninputr   r   get_constant_value
isinstancenpndarraysizeintget_constant_inputndimshaper   loggerwarningr   )	r   r   r   concatvaluer   _biasr   s	            r   get_num_heads_and_hidden_size0FusionAttentionVae.get_num_heads_and_hidden_size   s    &&y!4>S.!3>>4#3#333

--fll1o>!j

&C&C

VW>>4#3#333J	>>>4#3#333**//6L*T2::">">499PQ>>>4#3#333jjm>>A)~~"=%%UW`bpbp */&aK3C3C$C''TVacscst+0(%%r   q_matmulq_addk_matmulk_addv_matmulv_add
input_nameoutput_nameNc                 
   UR                   S   U	:w  d&  UR                   S   U	:w  d  UR                   S   U	:w  a@  [        R                  SUR                   S   UR                   S   UR                   S   5        gUS:  a  X-  S:w  a  [        R                  SX5        gU R                  R	                  UR                   S   5      nU R                  R	                  UR                   S   5      nU R                  R	                  UR                   S   5      nU(       a  U(       a  U(       d  gU R                  R	                  UR                   S   5      =(       d(    U R                  R	                  UR                   S   5      nU R                  R	                  UR                   S   5      =(       d(    U R                  R	                  UR                   S   5      nU R                  R	                  UR                   S   5      =(       d(    U R                  R	                  UR                   S   5      n[
        R                  " U5      n[
        R                  " U5      n[
        R                  " U5      n[        R                  " UR                  5      n[        R                  " UR                  5      n[        R                  " UR                  5      nUR                  S:X  a  [        R                  S5        g[
        R                  " U5      n[
        R                  " U5      n[
        R                  " U5      nUR                  UR                  :w  d  UR                  UR                  :w  a  gUR                  S   nUR                  S   nUR                  S   nUU:X  a  UU:X  d   eUS:  a  UU:w  a  [        SU S	U S
35      e[        R                  " UR                  SS 5      n[        R                  " UUU4SS9nS[        U5      -  nU R                  R                  S5      n UUs=:X  a  U:X  d   e   eSn![        R                  " UUU4SS9n"SU-  n!U R                  U S-   [         R"                  UU/US9  [        R$                  " SU/[        R&                  S9n"SU-  n!U R                  U S-   [         R"                  U!/U"S9  U	U S-   U S-   /n#[(        R*                  " SU#U
/U S9n$SU$l        U$R.                  R1                  [(        R2                  " SU5      /5        U R5                  S5        U$$ )a  Create an Attention node.

Args:
    q_matmul (NodeProto): MatMul node in fully connection for Q
    q_add (NodeProto): Add bias node in fully connection for Q
    k_matmul (NodeProto): MatMul node in fully connection for K
    k_add (NodeProto): Add bias node in fully connection for K
    v_matmul (NodeProto): MatMul node in fully connection for V
    v_add (NodeProto): Add bias node in fully connection for V
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    input_name (str): input name
    output_name (str): output name

Returns:
    Union[NodeProto, None]: the node created or None if failed.
r   zRFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNz9input hidden size %d is not a multiple of num of heads %dr    
   zBweights are in fp16. Please run fp16 conversion after optimizationzInput hidden size (z,) is not same as weight dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   r   _qkv_weight)name	data_typedimsvals)dtype	_qkv_bias)inputsoutputsrD   zcom.microsoftr   zAttention (self attention))r%   r/   debugr   get_initializerr   to_arrayr(   prodr.   rE   
ValueErrorstackr+   create_node_nameadd_initializerr   FLOATzerosfloat32r   	make_nodedomain	attributeextendmake_attributeincrease_counter)%r   r7   r8   r9   r:   r;   r<   r   r   r=   r>   q_weight_tensork_weight_tensorv_weight_tensorq_bias_tensork_bias_tensorv_bias_tensorq_biask_biasv_biasq_bias_shapek_bias_shapev_bias_shapeq_weightk_weightv_weight
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameqkv_bias_dimqkv_biasattention_inputsattention_nodes%                                        r   create_attention_node(FusionAttentionVae.create_attention_nodeF   s   < >>!
*hnnQ.?:.MQYQ_Q_`aQbfpQpLLdq!q!q!	 ? 7A=LLTVam**44X^^A5FG**44X^^A5FG**44X^^A5FGO

225;;q>BpdjjF`F`afalalmnaoFp

225;;q>BpdjjF`F`afalalmnaoFp

225;;q>BpdjjF`F`afalalmnaoFp&&}5&&}5&&}5wwv||,wwv||,wwv||, $$*LL]^((9((9((9 >>X^^+x~~/O^^A&
^^A&
^^A&
Z'J*,DDD?{j8%k]2^_i^j kJ J  gghnnQR01XXx8<1E
S--"jj99+F|;|;;;;;88VVV41=<'$}4!''n-	 	 	
 88Q,BJJ?;${2!''	 	 	
 -/+-
  ))# M$	
 !0  '')>)>{I)V(WX:;r   c                 n   U R                   R                  USUSS9nUc  g U R                   R                  USUSS9nUc  g U R                   R                  USUSS9nUc  g U R                   R                  USUSS9nUc  g U R                   R                  USUSS9nUc  g U R                   R                  USUSS9n	U	c  g U R                   R                  U	SUSS9n
U
c  g U R                   R                  U/ SQ/ SQ5      nUc  [        R	                  S	5        g Uu      pnU R                   R                  U/ S
Q/ SQ5      nUb  Uu  nnnnO[        R	                  S5        g U R                   R                  U/ SQ/ SQ5      nUc  [        R	                  S5        g Uu  nnnnnU R                   R                  U/ SQ/ SQ5      nUc  [        R	                  S5        g Uu        nnnUnU R                  UU5      u  nnUS::  a  [        R	                  S5        g U R                  UUUUUUUUUR                  S   UR                  S   5
      nUc  g U R                  R                  U5        U R                  U R                  UR                  '   U R                  R                  UU/5        SU l        g )NMatMulF)	recursiveReshape	TransposeAdd)r|   r}   r|   r~   rz   )r    r   r   r   Nz&fuse_attention: failed to match v path)r   r~   Mulrz   )r   r   r   r   z'fuse_attention: failed to match qk path)r   r   r   r   Nz&fuse_attention: failed to match q path)r}   r|   r}   r|   r~   rz   )r    r   r   r   r   Nz&fuse_attention: failed to match k pathr   z*fuse_attention: failed to detect num_headsT)r   find_first_child_by_typematch_parent_pathr/   rL   r5   rw   r%   outputnodes_to_addappendthis_graph_namenode_name_to_graph_namerD   nodes_to_removerZ   prune_graph) r   softmax_nodeinput_name_to_nodesoutput_name_to_node
matmul_qkvreshape_qkvtranspose_qkvreshape_out
matmul_outadd_outtranspose_outv_nodesr3   add_vmatmul_vqk_nodes_softmax_qk	_add_zero_mul_qk	matmul_qkq_nodes_transpose_qr   r   matmul_qk_nodesadd_kmatmul_kattention_last_nodeq_num_headsq_hidden_sizenew_nodes                                    r   fuseFusionAttentionVae.fuse   s   ZZ88xQdpu8v
jj99*iQdpu9v

;;&9U < 
  jj999&9U : 
 ZZ88hPcot8u
**55j%I\hm5n?

;;G[Reqv;w **..LN`
 ?LLAB%,"Aq:://
<_amn;C8[)WiLLBC**..KM_
 ?LLAB8?5L)UH**..XZo
 ?LLAB(/%Aq!UH)%)%G%G	SX%Y"]!LLEF --NN1&&q)
   *6:6J6J$$X]]3##%8-$HI  r   )r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r	   r+   r   r   tupler5   strrw   r   __static_attributes____classcell__)r   s   @r   r   r      s    (i (c (c ('&y '& '&W\]`be]eWf '&RHH H 	H
 H H H H H H H 
T	HT\  \ r   r   )loggingr   numpyr(   fusion_baser   onnxr   r   r   r   
onnx_modelr	   r   r/   r    r   r   <module>r      s1   
    = =  	8	]  ] r   