
    h=b                     z    S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
JrJr  S SKJr  \" \5      r " S S\5      rg)	    )	getLoggerN)Fusion)FusionUtils)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   H  ^  \ rS rSrSrS\4U 4S jjrS"S\S\4S jjr	S\S	\
S\4S
 jrS\S\S\4S jrS\S\S-  4S jrS\S\S-  4S jrS\S\4S jrS\S\\\4   S\
4S jrS\S\S-  4S jrS\S\S-  4S jrS\S\S-  4S jrS\S\S\S\S\S\4S jrS  rS!rU =r$ )#FusionMultiHeadAttentionMMDit   zG
Fuse MultiHeadAttention for Multimodal Diffusion Transformer (MMDiT).
modelc                 4   > [         TU ]  USS/S9  0 U l        g )NMultiHeadAttentionSoftmax)fused_op_typesearch_op_types)super__init__unsqueeze_update_map)selfr   	__class__s     c/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_mha_mmdit.pyr   &FusionMultiHeadAttentionMMDit.__init__   s$    .BU^T_`$&!    
start_nodereturnc                 2   U R                   R                  U/ SQUSS/US9nUc  gUS   n[        UR                  5      S:w  a  gU R                   R	                  UR                  S   5      nUc  g[        UR
                  5      S:w  a  g[        US   5      $ )a8  
Detect num_heads from Reshape & Transpose of q/k/v for both Stable Diffusion 3.x and Flux 1.x:

        MatMul    .. [-1] [24] ..
         |        |  |  /   /
        Add     Concat(axis=0)
          |      /
          Reshape
             |
         Transpose(perm=0,1,3,2)
             |
       (start_node)
)	TransposeReshapeConcatr      output_name_to_node      )r   match_parent_pathleninputget_constant_valueshapeint)r   r   r$   input_indexnodesconcat_shapevalues          r   get_num_heads+FusionMultiHeadAttentionMMDit.get_num_heads   s     

,,:[!Q<Oex - 
 =Ry|!!"a'

--l.@.@.CD=u{{q 58}r   transpose_kconcat_before_transposec                    U(       a>  U R                   R                  USS/SS/US9nU(       a  U R                  US   U5      $  gU R                   R                  US/S/US9nU(       a  U R                  US   U5      $ g)a  
        Detect num_heads from subgraph like the following (num_heads=24 in this example):
                       MatMu    .. [-1] [24] ..
                         |       |  |  /   /
                        Add     Concat
                          |      /
                         Reshape
                            |
                     Transpose(perm=0,2,1,3)
                            |
                     SimplifiedLayerNormalization
                            |
                    Transpose(perm=0,1,3,2)

        Another variant is to an extra Concat node to join two symmetrical subgraphs:

                   |              |
                  MatMul        MatMul   .. [-1] [24] ..
                   |              |       |  |  /   /
                  Add  Concat    Add      Concat
                    |  /          |      /
                  Reshape         Reshape
                    |              |
                 Transpose     Transpose(perm=0,2,1,3)
                    |              |
SimplifiedLayerNormalization  SimplifiedLayerNormalization
                        |     /
                       Concat
                         |
                    Transpose(perm=0,1,3,2)

            Both patterns are used in stable diffusion 3.5 model.
r!   SimplifiedLayerNormalizationr   r"   r#   )r   r(   r2   )r   r4   r$   r5   r/   s        r   get_num_heads_from_k2FusionMultiHeadAttentionMMDit.get_num_heads_from_k:   s    D #JJ00h(FG!Qex 1 E ))%(4GHH   JJ00<=sXk 1 E ))%(4GHHr   
input_nameoutput_namec                    SnU R                   R                  U5      nUcO  [        R                  " [        R
                  " / SQSS9US9nU R                   R                  X@R                  5        [        R                  " SX/U/U R                   R                  S5      S9nU R                  R                  U5        U R                  U R                  UR                  '   UR                  S   $ )	zAdd a Reshape node to convert 4D BxSxNxH to 3D BxSxD.

Args:
    input_name (str): input name for the 4D tensor of shape BxSxNxH.
    output_name (str): output name for the 3D tensor of shape BxSxD, where D = N * H.

Returns:
    str: the output name
bsnh_to_bsd_reshape_dims)r   r   r%   int64)dtype)namer    inputsoutputsr@   r   )r   get_initializerr	   
from_arraynparrayadd_initializerthis_graph_namer   	make_nodecreate_node_namenodes_to_addappendnode_name_to_graph_namer@   output)r   r:   r;   new_dims_namenew_dims	reshape_qs         r   reshape_to_3d+FusionMultiHeadAttentionMMDit.reshape_to_3dk   s     3::--m<#..rxx
'/RYfgHJJ&&x1E1EF$$. M,,Y7	
	 	  +7;7K7K$$Y^^4""r   mul_qNc                 R   U R                   R                  USS/SS/5      nUc  gUu  pE[        R                  " US/ SQ5      (       d  gUR                  S   UR                  S'   UR
                  S   nUS-   UR
                  S'   U R                  UR
                  S   US-   5      $ )	a  
MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

Before:
                       MatMul
                         |
                       Add      Concat
                         |      /
                         Reshape
                          |
                       Transpose(perm=0,2,1,3)
                          |
               SimplifiedLayerNorm
                          |
                         Mul

After:
                       MatMul
                         |
                        Add      Concat
                         |      /
                         Reshape
                           |
                   SimplifiedLayerNorm
                           |
                Reshape (shape=[0, 0, -1])
r7   r   r   Npermr   r'   r"      _BSNH_BSD)r   r(   r   check_node_attributer*   rO   rS   )r   rU   r$   pathsln_atranspose_a
sln_outputs          r   'adjust_query_from_bnsh_to_bsd_no_concatEFusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd_no_concat   s    : zz+++[9F

 <!//V\RR %**1-A\\!_
$w.Q!!%,,q/:3FGGr   c                    U R                   R                  U/ SQ/ SQ5      nUc  gUu  pEn[        UR                  5      S:w  a  gU R                   R                  USS/SS/5      nUc  gUu  px[        R
                  " US	/ S
Q5      (       d  g[        R
                  " US	/ S
Q5      (       d  g[        R
                  " USS5      (       d  gUR                  S   UR                  S'   UR                  S   UR                  S'   [        R                  " SUR                  S   UR                  S   /UR                  S   S-   /U R                   R                  S5      SS9n	U R                  R                  U	5        U R                  U R                  U	R                  '   U R                  U	R                  S   UR                  S   S-   5      $ )a  
MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

    Before:
              MatMul      MatMul
                |            |
                Add Concat  Add    Concat
                 |    /      |      /
                 Reshape     Reshape
                    |           |
Transpose(perm=0,2,1,3)      Transpose(perm=0,2,1,3)
                    |           |
    SimplifiedLayerNorm  SimplifiedLayerNorm
                    |     /
                    Concat(axis=2)
                     |
                    Mul

    After:
           MatMul        MatMul
             |              |
            Add Concat     Add     Concat
             |    /         |     /
             Reshape       Reshape
                |            |
   SimplifiedLayerNorm  SimplifiedLayerNorm
                |       /
              Concat(axis=1)
                 |
              Reshape (shape=[0, 0, -1])
)r!   r7   r   )r   r   r   Nr'   r7   r   r"   r   rW   rX   axisr!   rZ   rB   rC   r@   rd   r[   )r   r(   r)   r*   r   r\   r   rJ   rO   rK   rL   rM   rI   rN   r@   rS   )
r   rU   r$   r]   concatr^   r_   sln_btranspose_bnew_concat_nodes
             r   adjust_query_from_bnsh_to_bsd;FusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd   s   B zz++C

 <%)"{v||!zz+++[9F

 <!//V\RR//V\RR//BB %**1-A$**1-A **LLOU\\!_5]]1%/0,,X6
 	  1=A=Q=Q$$_%9%9:!!/"8"8";V]]1=MPV=VWWr   	unsqueezec                 d   U R                   R                  UR                  5      nUGc  [        UR                  5      S:X  aN  [
        R                  " SUR                  UR                  S   S-   /U R                  R                  S5      S/S9nOSnU R                  R                  U5      cL  [
        R                  " U[        R                  S/S/S9nU R                  R                  XPR                  5        [
        R                  " SUR                  S   U/UR                  S   S-   /U R                  R                  S5      S	9nU R                   R#                  U5        U R                  U R$                  UR                  '   UR                  S   nX R                   UR                  '   U$ )
Nr"   	Unsqueezer   rZ   r'   )rB   rC   r@   axesunsqueeze_axes_2)r@   	data_typedimsvalsrA   )r   getr@   r)   r*   r   rJ   rO   r   rK   rD   make_tensorr   INT64rH   rI   rL   rM   rN   )r   rl   updated_unsqueeze_outputnew_nodeinitializer_namerp   s         r   update_unsqueeze_axes_1_to_2:FusionMultiHeadAttentionMMDit.update_unsqueeze_axes_1_to_2  s   #'#<#<#@#@#P #+9??#q(!++$??&--a07:;44[A $6 ::--.>?G'-'9'9-"-"3"3SS	($ JJ../?AUAUV!++%OOA.0@A&--a07:;44[A	 $$X.:>:N:ND((7'/q'9$8P%%inn5''r   addr$   c                    [        UR                  5      S:w  a  gU R                  R                  U/ SQ/ SQU5      nUc  g[	        U R                  5      nUR                  US   5      nUb  US/:w  a  gUR                  US   5      nUb  US/:w  a  gU R                  R                  U/ SQ/ SQU5      nUc  gUR                  US   5      nUb  US/:w  a  gUR                  US   5      nUb  US/:w  a  gU R                  US   5      US   R                  S'   U R                  US   5      US   R                  S'   g)	a.  
Update axes of Unsqueeze from [1] to [2] in the following pattern:
          Unsqueeze        Unsqueeze
          (axes=[0])       (axes=[0])
             |              |
          Unsqueeze        Unsqueeze
      ... (axes=[1])  ...  (axes=[1])
        |     /        |   /
           Mul         Mul
            |       /
             Add
Args:
    add (NodeProto): the Add node
    output_name_to_node (Dict[str, NodeProto]): mapping from output name to node

Returns:
    bool: True if the pattern is matched and updated successfully, False otherwise.
r'   F)Mulrn   rn   )r"   r"   r   r"   r   )r   r"   r   T)r)   r*   r   r(   r   get_squeeze_or_unsqueeze_axesrz   )r   r|   r$   nodes_bfusion_utilsaxes_1axes_0nodes_as           r   update_unsqueeze_axes3FusionMultiHeadAttentionMMDit.update_unsqueeze_axes(  sL   & syy>Q **..s4UW`buv?"4::.;;GAJG>Vs];;GAJG>Vs] **..s4UW`buv?;;GAJG>Vs];;GAJG>Vs]"??
K
"??
K
r   c                 :   U R                   R                  U/ SQ/ SQ5      nUc  gUu  pEpgn[        UR                  5      S:w  a  gU R                   R                  USS/SS/5      nUc  gUu  p[        R
                  " US	/ S
Q5      (       d  g[        R
                  " U
S	/ S
Q5      (       d  g[        R
                  " USS5      (       d  gU R                  XB5      (       d  gUR                  S   UR                  S'   U
R                  S   U	R                  S'   [        R                  " SUR                  S   U	R                  S   /UR                  S   S-   /U R                   R                  S5      SS9nU R                  R                  U5        U R                  U R                  UR                  '   U R                   R!                  UR                  S   UR                  S   5        U R#                  UR                  S   UR                  S   S-   5      $ )a[  
Adjust graph to change query format from BNSH to BSD for Flux model.
Note that the graph pattern is complex, and we only do a shallow match here.

Before:
               |               |
Transpose(perm=0,2,1,3)    Transpose(perm=0,2,1,3)
                |              |
SimplifiedLayerNorm  SimplifiedLayerNorm
                |             /
                Concat(axis=2)
                 |
                Mul     Mul
                 |    /
                  Add
                   |
                  Mul

After (Transpose nods are removed, and a Reshape is added):

                |           |
    SimplifiedLayerNorm  SimplifiedLayerNorm
                |         /
            Concat(axis=1)
                |
                Mul    Mul
                 |    /
                  Add
                   |
               Reshape (shape=[0, 0, -1])
)Addr~   r!   r7   r   )r   r   r   r   r   Nr'   r7   r   r"   r   rW   rX   rd   r!   rZ   re   r[   )r   r(   r)   r*   r   r\   r   r   rJ   rO   rK   rL   rM   rI   rN   r@   replace_input_of_all_nodesrS   )r   rU   r$   r]   r|   _mul_arf   r^   r_   rg   rh   ri   s               r   "adjust_flux_query_from_bnsh_to_bsd@FusionMultiHeadAttentionMMDit.adjust_flux_query_from_bnsh_to_bsd]  s   B zz++Q

 <26/VKv||!zz+++[9F

 <!//V\RR//V\RR//BB ))#CC %**1-A$**1-A **LLOU\\!_5]]1%/0,,X6
 	  1=A=Q=Q$$_%9%9:

--fmmA.>@V@VWX@YZ!!#**Q-A1GHHr   c                    U R                   R                  U/ SQ/ SQ5      nUc  gUu  pEpg[        R                  " US/ SQ5      (       d  gU R	                  XB5      (       d  gUR
                  S   UR
                  S'   UR                  S   S-   UR                  S'   U R                  UR                  S   UR                  S   S-   5      $ )	ax  
Adjust graph to change query format from BNSH to BSD for Flux model.
Note that the graph pattern is complex, and we only do a shallow match here.

Before:
              |
            Transpose(perm=0,2,1,3)
              |
            SimplifiedLayerNorm
              |
             Mul     Mul
               |   /
               Add
                |
               Mul

After (Transpose is removed, and a Reshape is added):

                |
              SimplifiedLayerNorm
                |
                Mul   Mul
                 |   /
                 Add
                  |
               Reshape (shape=[0, 0, -1])
)r   r~   r7   r   )r   r   r   r   NrW   rX   r   rZ   r[   )r   r(   r   r\   r   r*   rO   rS   )r   rU   r$   r]   r|   r   r^   r_   s           r   )adjust_flux_single_query_from_bnsh_to_bsdGFusionMultiHeadAttentionMMDit.adjust_flux_single_query_from_bnsh_to_bsd  s    : zz++G

 <*.'U//V\RR ))#CC %**1-A

1/

1!!#**Q-A1GHHr   qc           
         [         R                  " SU/US-   /U R                  R                  SSS9/ SQS9nU R                  R                  U5        U R                  U R                  UR                  '   U R                  US-   US-   5      $ )Nr   rZ   Transpose_BNSH_to_BSNH)name_prefixrX   )r@   rW   r[   )
r   rJ   r   rK   rL   rM   rI   rN   r@   rS   )r   r   r$   transpose_qs       r   transpose_reshape_bnsh_to_bsd;FusionMultiHeadAttentionMMDit.transpose_reshape_bnsh_to_bsd  s    &&C[M,,[F^,_
 	  -9=9M9M$$[%5%56!!!g+q6z::r   kvrO   	num_headsc                     US:  d   eXU/nU/n[         R                  " SUUU R                  R                  S5      S9nSUl        UR
                  R                  [         R                  " SU5      /5        U$ )a.  
Create a MultiHeadAttention node.

Args:
    q (str): name of q
    k (str): name of k
    v (str): name of v
    output (str): output name of MHA
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

Returns:
    NodeProto: the node created.
r   r   rA   zcom.microsoftr   )r   rJ   r   rK   domain	attributeextendmake_attribute)	r   r   r   r   rO   r   
mha_inputsmha_outputsmha_nodes	            r   create_multihead_attention_node=FusionMultiHeadAttentionMMDit.create_multihead_attention_node  s    , 1}} AY
 h## ,,-AB	
 *!!6#8#8i#P"QR r   c                 B   UR                   S:X  d   eUnU R                  R                  UR                  S   5      (       a  g U R                  R	                  U/ SQ/ SQU5      nUc  g Uu  pgn[
        R                  " US/ SQ5      (       d  g U R                  R                  U/ SQ/ SQ5      n	U	c  g U	u  ppn  nnUR                  S   nUUR                  S   :w  a  g U R                  R                  U
S	S
/SS/5      nUc  g Uu  nnUR                  S   n[
        R                  " US/ SQ5      (       d  g U R                  R                  USS/SS/5      nUc  g US   R                  S   UR                  S   :w  a  g UR                  S   nU R                  R                  USSUS9nUb  U R                  R                  US
SUS9nUc  g [
        R                  " US/ SQ5      (       d  g U R                  R                  US
SUS9nUc  g [
        R                  " US/ SQ5      (       d  g O@U R                  R                  US
SUS9nUc  g [
        R                  " US/ SQ5      (       d  g U(       a  U R                  UU5      OU R                  XcSS9nUS:X  a  U R                  UUUS L5      nUS::  a  g Ub  U R                  X5      nOU R                  X5      nUc:  U R                  X5      nUc&  U R                  X5      nUc  U R!                  UU5      nU R#                  UUUUR                  S   US9nU R$                  R'                  U5        U R(                  U R*                  UR,                  '   U R.                  R1                  XgU/5        SU l        g )Nr   r   )MatMulr   r    )r   r   r   r   rW   rX   )r   r~   SqrtDivr   CastSliceShape)r   r   r"   r   r"   r   r   r   r~   r   r"   )r   r"   rY   r'   r   r   r!   )r.   r$   )r.   )r   r   r   rO   r   T)op_typer   find_graph_outputrO   match_child_pathr   r\   r(   r*   match_parentr2   r8   rj   ra   r   r   r   r   rL   rM   rI   rN   r@   nodes_to_remover   prune_graph)r   nodeinput_name_to_nodesr$   softmaxr/   
matmul_s_vtranspose_outreshape_outq_nodes	matmul_qkrU   sqrt_q_2div_qsqrt_q_shape_qq_bnshk_nodesmul_kr4   r   k_scale_nodesr   concat_vtranspose_1transpose_2r   queryrx   s                                 r   fuse"FusionMultiHeadAttentionMMDit.fuse  s   ||y((( ::''q(9::

++79QSf
 =16.
;//v|TT**..N$
 ?CJ@	(61aQW]]1%%**..y5+:NQRTUPVW?${a //V\RR

44UVUOaQRVT !!!$q(99Q ::**:xQdw*x **11+1J] 2 K "33KVV**11+1J] 2 K "33KVV W
 **11KQL_ 2 K "33KVV
  x)<=##JQR#S 	 >11+?RT\dhThiIA~ 66uRE@@\E=;;EWE}FFub= !>>vGZ[E77%%a( 8 
 	  *6:6J6J$$X]]3##Z$LM  r   )r   r   )r   )__name__
__module____qualname____firstlineno____doc__r
   r   r   r-   r2   boolr8   strrS   ra   rj   rz   dictr   r   r   r   r   r   __static_attributes____classcell__)r   s   @r   r   r      s|   'i '	 Z] B/	 /im /ru /b# ## ## #4.HY .H`cfj`j .H`MX9 MXVY\`V` MX^"(i "(C "(H3 3cS\nI] 3bf 3jRI	 RI[^ae[e RIh1Iy 1Ibehlbl 1If;s ;CRVJ ;)) ) 	)
 ) ) 
)V   r   r   )loggingr   numpyrF   fusion_baser   r   r   onnxr   r   r   r	   
onnx_modelr
   r   loggerr    r   r   <module>r      s4   
    $ = =  	8	K
 F K
 r   