
    h`                     v    S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
Jr  S SKJr  \" \5      r " S S\5      rg)	    )	getLoggerN)Fusion)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModelc                   *  ^  \ rS rSrSrS\S\S\S\S\S\4U 4S	 jjrS&S
\	S\S\4S jjr
S r S&S
\	S\	S\S\\\4   4S jjrS\	S\	S\	S\S\S\S\S\	S-  4S jrS\	S\	S\	S\S\S\S\S\	S-  4S jrS rS rS rS rS  rS!\	4S" jrS# rS$ rS%rU =r$ )'FusionAttentionUnet   z:
Fuse Attention subgraph of UNet into one Attention node.
modelhidden_size	num_headsis_cross_attentionenable_packed_qkvenable_packed_kvc                    > [         TU ]  UU(       a	  U(       a  SOSS/5        X l        X0l        X@l        XPl        X`l        SU l        SU l        g )N	AttentionMultiHeadAttentionLayerNormalizationT)	super__init__r   r   r   r   r   num_heads_warninghidden_size_warning)selfr   r   r   r   r   r   	__class__s          h/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_attention_unet.pyr   FusionAttentionUnet.__init__   sY     	-2CKI]!"	

 '""4 "3 0 "&#'     	reshape_q	is_torch2returnc                    SnU(       a  U R                   R                  US5      nU(       a  UR                  S:X  a  [        UR                  5      S:X  al  U R                   R                  UR                  S   5      n[        U[        R                  5      (       a%  [        UR                  5      S/:X  a  [        U5      nOoU R                   R                  UR                  S   5      n[        U[        R                  5      (       a(  [        UR                  5      S/:X  a  [        US   5      n[        U[        5      (       a  US:  a  U$ g)zDetect num_heads from a reshape node.

Args:
    reshape_q (NodeProto): reshape node for Q
    is_torch2 (bool): graph pattern is from PyTorch 2.*
Returns:
    int: num_heads, or 0 if not found
r      Concat      )r   
get_parentop_typeleninputget_constant_value
isinstancenpndarraylistshapeint)r   r    r!   r   reshape_parentq_shape_values         r   get_num_heads!FusionAttentionUnet.get_num_heads2   s    	!ZZ229a@N."8"8H"D^MaMaIbfgIg JJ99.:N:Nq:QR	i44ioo9NSTRU9U #II !JJ99)//!:LMM-44m>Q>Q9RWXVY9Ya 01	i%%)a-r   c                     U R                   R                  UR                  S   5      nU(       a#  [        R                  " U5      R
                  S   $ g)zDetect hidden_size from LayerNormalization node.
Args:
    layernorm_node (NodeProto): LayerNormalization node before Q, K and V
Returns:
    int: hidden_size, or 0 if not found
r'   r   )r   get_initializerr+   r   to_arrayr1   )r   layernorm_nodelayernorm_biass      r   get_hidden_size#FusionAttentionUnet.get_hidden_sizeN   sE     33N4H4H4KL''7==a@@r   r:   c                    U R                  X5      nUS::  a  U R                  nU R                  S:  aM  X@R                  :w  a>  U R                  (       a-  [        R	                  SU R                   SU S35        SU l        U R                  U5      nUS::  a  U R                  nU R                  S:  aM  XPR                  :w  a>  U R                  (       a-  [        R	                  SU R                   SU S35        SU l        XE4$ )a  Detect num_heads and hidden_size.

Args:
    reshape_q (NodeProto): reshape node for Q
    is_torch2 (bool): graph pattern is from PyTorch 2.*
    layernorm_node (NodeProto): LayerNormalization node before Q, K, V
Returns:
    Tuple[int, int]: num_heads and hidden_size
r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r5   r   r   loggerwarningr<   r   r   )r   r    r:   r!   r   r   s         r   get_num_heads_and_hidden_size1FusionAttentionUnet.get_num_heads_and_hidden_size[   s     &&y<	>I>>A)~~"=%%0@@TU^T__vwx).&**>:!**KaK3C3C$C'''(8(8'99Mk]Zqr ,1(%%r   q_matmulk_matmulv_matmulr+   outputNc           
         U R                   (       + nU(       az  UR                  S   U:w  d&  UR                  S   U:w  d  UR                  S   U:w  a@  [        R                  SUR                  S   UR                  S   UR                  S   5        gOUR                  S   U:w  d3  UR                  S   UR                  S   :w  d  UR                  S   U:X  a@  [        R                  SUR                  S   UR                  S   UR                  S   5        gUS:  a$  XT-  S:w  a  [        R                  SU SU 35        gU R                  R                  UR                  S   5      n	U R                  R                  UR                  S   5      n
U R                  R                  UR                  S   5      nU	(       a  U
(       a  U(       d  gU	R                  n[        R                  " U	5      n[        R                  " U
5      n[        R                  " U5      n[        R                  SUR                   S	UR                   S
UR                   SU 35        U(       Ga  UR                  UR                  :w  d  UR                  UR                  :w  a  gUR                  S   nUS:  a  UU:w  a  [        SU SU S35      e[        [        R                  " UR                  SS 5      5      nU R                  (       Ga  U R                  R                  S5      nUnUnUU-  n[        R                   " UR#                  UUU5      UR#                  UUU5      UR#                  UUU5      /5      R#                  UUS-  U-  5      nU R                  R                  SSS9nU R%                  US-   UUR                  S   UR                  S   /US9  [&        R(                  " SUR                  S   US-   /US-   /US9nU R*                  U R,                  UR.                  '   U R%                  US-   [0        R2                  S/SSUSU/SS9  [&        R(                  " SUS-   US-   /US-   /US-   S9nU R*                  U R,                  UR.                  '   U R4                  R7                  UU/5        U R8                  R7                  XU/5        GO[        R:                  " XU4SS9nSU-  nU R                  R                  S 5      nU R%                  US!-   UUU/US9  GOIU R                  R                  S5      nU R<                  (       Ga  UR                  UR                  :w  a  gUR                  S   nUR                  S   nUU:X  d   eUR                  S   nUR                  S   nUR                  S   nUU:X  a  UU:X  d   eUnUnUU-  n[        R                   " UR#                  UUU5      UR#                  UUU5      /5      R#                  UUS"-  U-  5      nU R                  R                  SS#S9nU R%                  US-   UUR                  S   UR                  S   /US9  [&        R(                  " SUR                  S   US-   /US-   /US9nU R*                  U R,                  UR.                  '   U R%                  US-   [0        R2                  S/SSUS"U/SS9  [&        R(                  " SUS-   US-   /US$-   /US-   S9nU R*                  U R,                  UR.                  '   U R4                  R7                  UU/5        U R8                  R7                  X#/5        [        R>                  " SU/[        R@                  S%9n SU-  n!U R%                  US&-   UU!/U S9  U(       a$  U R                  (       d  UUS!-   US&-   /n"O]US-   /n"OVU R<                  (       d1  URB                  S   URB                  S   URB                  S   US&-   /n"OURB                  S   US$-   /n"[&        R(                  " U(       a  U R                  (       d  S OSU"U/US9n#S'U#l"        U#RF                  R7                  [&        RH                  " S(U5      /5        U(       a  U R                  (       d  S)O6S*RK                  U R                  (       a  S+OU R<                  (       a  S,OS-5      n$U RM                  U$5        U#$ ).=  Create an Attention node.

Args:
    q_matmul (NodeProto): MatMul node in fully connection for Q
    k_matmul (NodeProto): MatMul node in fully connection for K
    v_matmul (NodeProto): MatMul node in fully connection for V
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    input (str): input name
    output (str): output name

Returns:
    Union[NodeProto, None]: the node created or None if failed.
r   RFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNXFor cross attention, input hidden state for q and k/v shall be different. Got %s, %s, %sinput hidden size # is not a multiple of num of heads r$   qw= kw= vw= hidden_size=Input hidden size (,) is not same as weight dimension of q,k,v (:). Please provide a correct input hidden size or pass in 0r      MatMul
MatMul_QKVname_prefix_weightname	data_typedimsvals_outinputsoutputsr[   _reshape_shape   Fr[   r\   r]   r^   rawReshape
_qkv_input_reshape)axisr   _qkv_weightr'   	MatMul_KV	_kv_inputdtype	_qkv_biascom.microsoftr   Attention (self attention)MultiHeadAttention ({})self attention with packed qkvcross attention with packed kvcross attention)'r   r+   r?   debugr   r8   r\   r   r9   r1   
ValueErrorr2   r.   prodr   create_node_namedstackreshapeadd_initializerr   	make_nodethis_graph_namenode_name_to_graph_namer[   r   INT64nodes_to_addextendnodes_to_removestackr   zerosfloat32rF   domain	attributemake_attributeformatincrease_counter)%r   rC   rD   rE   r   r   r+   rF   is_self_attentionq_weightk_weightv_weight
float_typeqwkwvw
qw_in_sizeqw_out_sizeattention_node_namecnh
qkv_weightmatmul_node_namematmul_nodereshape_nodeqkv_weight_dim
kw_in_size
vw_in_sizekw_out_sizevw_out_size	kv_weightqkv_biasqkv_bias_dimattention_inputsattention_nodecounter_names%                                        r   create_attention_node)FusionAttentionUnet.create_attention_node}   s	   0 !% 7 77~~a E)X^^A->%-G8>>Z[K\`eKehNN1%NN1%NN1%	  Lf ~~a E)hnnQ.?8>>RSCT.TZbZhZhijZkotZtnNN1%NN1%NN1%	 ? 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X( ''
!!(+!!(+!!(+s288*D
$rxxjk][\ xx288#rxx288';!JQ;*#< )+6bcmbn oN N  bggbhhqrl34K%%%&*jj&A&ABV&W#9,  YY

1a(;RZZ1a=PRTR\R\]^`acdRe'fgooq1uqy
 $(::#>#>xUa#>#b $$)I5($**1-z/?/?/BC#	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=>$$),<<)//Q1a %   &//(61(+;; 1<?@)J6  CGBVBV,,\->->?!!((+|)DE$$++X,JK  XXrrl;
!"[&*jj&A&A+&N#$$,}<($n5#	 %  #'**"="=>R"S$$$88rxx'XXa[
XXa[
!Z/// hhqk hhqk hhqk"k1k[6PPP9, IIrzz!Q':BJJq!Q<O&PQYYZ[]^ab]bef]fg	#'::#>#>xU`#>#a $$)I5(#//!,iooa.@A"	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=>$$),<<)//Q1a %   &//(61(+;; 1;>?)J6  CGBVBV,,\->->?!!((+|)DE$$++X,@A 88Q,BJJ?;${2 	 	 	
 ))'-7'+5$  %8,$F#G ((OOA&OOA&OOA&'+5	$  OOA&'+5$ 
  ))-d6L6LKSg#H$	
 !0  '')>)>{I)V(WX !)?)? )*11)) 1 (( 6& 	 	l+r   q_matmul_addk_matmul_addv_matmul_addc           
          U R                   (       + nU R                  R                  USS5      n	U R                  R                  USS5      n
U R                  R                  USS5      nU R                  U5      nUc  gUu  pU R                  U5      nUc  gUu  nnU R                  U5      nUc  gUu  nnU(       a  U	R                  S   U:w  d&  U
R                  S   U:w  d  UR                  S   U:w  a@  [
        R                  SU	R                  S   U
R                  S   UR                  S   5        gUR                  S   U:w  d&  UR                  S   U:w  d  UR                  S   U:w  a@  [
        R                  SUR                  S   UR                  S   UR                  S   5        gGOU	R                  S   U:w  d3  U
R                  S   UR                  S   :w  d  U
R                  S   U:X  a@  [
        R                  SU	R                  S   U
R                  S   UR                  S   5        gUR                  S   U:w  d3  UR                  S   UR                  S   :w  d  U
R                  S   U:X  a@  [
        R                  SUR                  S   UR                  S   UR                  S   5        gUS:  a$  XT-  S:w  a  [
        R                  SU S	U 35        gU R                  R                  U	R                  S
   5      nU R                  R                  U
R                  S
   5      nU R                  R                  UR                  S
   5      nU(       a  U(       a  U(       d  gUR                  S:X  a  [
        R                  S5        g[        R                  " U5      n[        R                  " U5      n[        R                  " U5      n[
        R                  SUR                   SUR                   SUR                   SU 35        U(       Ga  UR                  UR                  :w  d  UR                  UR                  :w  a  gUR                  S   nUS:  a  UU:w  a  [        SU SU S35      e[        [        R                  " UR                  S
S 5      5      nU R                   (       Ga   U R                  R#                  S5      nUnUnUU-  n [        R$                  " UR'                  UUU 5      UR'                  UUU 5      UR'                  UUU 5      /5      R'                  UUS-  U -  5      n!U R                  R#                  SSS9n"U R)                  U"S-   [*        R,                  U!R                  S   U!R                  S
   /U!S9  [.        R0                  " SU
R                  S   U"S-   /U"S-   /U"S9n#U R2                  U R4                  U#R6                  '   UR6                  S-   n$U R)                  U$[*        R8                  S/SSUU /SS9  U R                  R#                  S S!S9n%[.        R0                  " S UR:                  S   U$/U%S-   /U%S9n&U R2                  U R4                  U&R6                  '   U R                  R#                  S S"S9n'[.        R0                  " S UR:                  S   U$/U'S-   /U'S9n(U R2                  U R4                  U(R6                  '   U R                  R#                  S S#S9n)[.        R0                  " S UR:                  S   U$/U)S-   /U)S9n*U R2                  U R4                  U*R6                  '   U R                  R#                  S$S%S9n+[.        R0                  " S$U&R:                  S   U(R:                  S   U*R:                  S   /U+S-   /U+S9n,U,R<                  R?                  [.        R@                  " S&S5      /5        U R2                  U R4                  U,R6                  '   U,R6                  S-   n-U R)                  U-[*        R8                  S/SSUS-  U -  /SS9  U R                  R#                  S S'S9n.[.        R0                  " S U,R:                  S   U-/U.S-   /U.S9n/U R2                  U R4                  U/R6                  '   U R                  R#                  S(S)S9n0[.        R0                  " S(U/R:                  S   U#R:                  S   /U0S-   /U0S9n1U R2                  U R4                  U1R6                  '   U0S-   n2U R)                  U2[*        R8                  S*/SSUSU /SS9  [.        R0                  " S U1R:                  S   U2/US+-   /U0S,-   S9n3U R2                  U R4                  U3R6                  '   U RB                  R?                  U#U&U(U*U,U/U1U3/5        U RD                  R?                  XXX#/5        GO&gU R                  R#                  S5      nU RF                  (       Ga  UR                  UR                  :w  a  gUR                  S   n4UR                  S   n5U4U5:X  d   eUR                  S
   nUR                  S
   n6UR                  S
   n7UU7:X  a  U6U7:X  d   eU4nUnU6U-  n [        R$                  " UR'                  UUU 5      UR'                  UUU 5      /5      R'                  UUS--  U -  5      n8U R                  R#                  SS.S9n"U R)                  U"S-   [*        R,                  U8R                  S   U8R                  S
   /U8S9  [.        R0                  " SU
R                  S   U"S-   /U"S-   /U"S9n#U R2                  U R4                  U#R6                  '   UR6                  S-   n9U R)                  U9[*        R8                  S/SSUU /SS9  U R                  R#                  S S"S9n'[.        R0                  " S UR:                  S   U9/U'S-   /U'S9n(U R2                  U R4                  U(R6                  '   U R                  R#                  S S#S9n)[.        R0                  " S UR:                  S   U9/U)S-   /U)S9n*U R2                  U R4                  U*R6                  '   U R                  R#                  S$S/S9n:[.        R0                  " S$U(R:                  S   U*R:                  S   /U:S-   /U:S9n;U;R<                  R?                  [.        R@                  " S&S5      /5        U R2                  U R4                  U;R6                  '   U;R6                  S-   n<U R)                  U<[*        R8                  S/SSUS--  U -  /SS9  U R                  R#                  S S0S9n=[.        R0                  " S U;R:                  S   U</U=S-   /U=S9n>U R2                  U R4                  U>R6                  '   U R                  R#                  S(S1S9n?[.        R0                  " S(U>R:                  S   U#R:                  S   /U?S-   /U?S9n@U R2                  U R4                  U@R6                  '   U?S-   n2U R)                  U2[*        R8                  S*/SSUS-U /SS9  [.        R0                  " S U@R:                  S   U2/US2-   /U?S,-   S9n3U R2                  U R4                  U3R6                  '   U RB                  R?                  U#U(U*U;U>U@U3/5        U RD                  R?                  XX#/5        Og[        RH                  " SU/[        RJ                  S39nASU-  nBU R)                  US4-   [*        R,                  UB/UAS9  U(       a  U R                   (       d  gUS+-   /nCO&U RF                  (       d  gUR:                  S   US2-   /nC[.        R0                  " U(       a  U R                   (       d  S5OSWCU/US9nDS6UDl&        UDR<                  R?                  [.        R@                  " S7U5      /5        U(       a  U R                   (       d  S8O6S9RO                  U R                   (       a  S:OU RF                  (       a  S;OS<5      nEU RQ                  UE5        WD$ )=rH   rU   r   NrI   z_For self attention, input hidden state for LoRA q and k/v weights shall be same. Got %s, %s, %srJ   zeFor cross attention, input hidden state for LoRA q and k/v weights shall be different. Got %s, %s, %srK   rL   r$   
   zBweights are in fp16. Please run fp16 conversion after optimizationrM   rN   rO   rP   rQ   rR   rS   r   rT   rV   rW   rY   rZ   r_   r`   rc   r&   Fre   rg   Reshape_LoRA_QReshape_LoRA_KReshape_LoRA_Vr%   Concat_LoRA_QKVrj   Reshape_LoRA_QKVAddAdd_Weights_QKVrd   rh   ri   r'   rl   Concat_LoRA_KVReshape_LoRA_KVAdd_Weights_KVrm   rn   rp   r   rq   r   rr   rs   rt   ru   rv   ))r   r   match_parentmatch_lora_pathr+   r?   rw   r8   r\   r   r9   r1   rx   r2   r.   ry   r   rz   r{   r|   r}   r   FLOATr   r~   r   r   r[   r   rF   r   r   r   r   r   r   r   r   r   r   r   )Fr   r   r   r   r   r   r+   rF   r   rC   rD   rE   q_lora_nodesq_lora_last_nodeq_lora_matmul_1k_lora_nodesk_lora_last_nodek_lora_matmul_1v_lora_nodesv_lora_last_nodev_lora_matmul_1r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   lora_weight_shape_tensor_nameq_lora_reshape_node_nameq_lora_reshape_nodek_lora_reshape_node_namek_lora_reshape_nodev_lora_reshape_node_namev_lora_reshape_nodeqkv_lora_concat_node_nameqkv_lora_concat_node'reshaped_lora_weights_shape_tensor_nameqkv_lora_reshaped_node_nameqkv_lora_reshaped_nodeadd_weights_node_nameadd_weights_nodeshape_tensor_namer   r   r   r   r   r    kv_lora_weight_shape_tensor_namekv_lora_concat_node_namekv_lora_concat_node*reshaped_kv_lora_weights_shape_tensor_namekv_lora_reshaped_node_namekv_lora_reshaped_nodeadd_kv_weights_node_nameadd_kv_weights_noder   r   r   r   r   sF                                                                         r   create_attention_node_lora.FusionAttentionUnet.create_attention_node_lora  s%   0 !% 7 77::**<1E::**<1E::**<1E++L9.:+	++L9.:+	?++L9.:+	?~~a E)X^^A->%-G8>>Z[K\`eKehNN1%NN1%NN1%	   %%a(E1"((+u4"((+u4u#))!,#))!,#))!,	  5 ~~a E)hnnQ.?8>>RSCT.TZbZhZhijZkotZtnNN1%NN1%NN1%	   %%a(E1#))!,0E0Ea0HHNN1%.) $))!,#))!,#))!, ? 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X( #LL]^!!(+!!(+!!(+s288*D
$rxxjk][\ xx288#rxx288';!JQ;*#< )+6bcmbn oN N  bggbhhqrl34K%%%&*jj&A&ABV&W#9,  YY

1a(;RZZ1a=PRTR\R\]^`acdRe'fgooq1uqy
 $(::#>#>xUa#>#b $$)I5)//$**1-z/?/?/BC#	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=> 1A0E0EHX0X-$$6)//Q1 %  ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF -1JJ,G,G^o,G,p)'-'7'7+2215+2215+2215
 7?@2	($ %..55v7L7LVUV7W6XYJNJ^J^,,-A-F-FG ;O:S:SVf:f7$$@)//QA	* %  /3jj.I.I)as.I.t+)/)9)9077:<cd86AB4	*& MQL`L`,,-C-H-HI )-

(C(CEWh(C(i%#)#3#3299!<k>P>PQR>ST2V;<.	$  GKFZFZ,,-=-B-BC %:<L$L!$$*)//Q1a %   &//,33A68IJ0<?@.;	  CGBVBV,,\->->?!!((#+++,.($	 $$++XYe,tu "&**"="=>R"S$$$88rxx'XXa[
XXa[
!Z/// hhqk hhqk hhqk"k1k[6PPP9, IIrzz!Q':BJJq!Q<O&PQYYZ[]^ab]bef]fg	#'::#>#>xU`#>#a $$)I5)//#//!,iooa.@A"	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=> 4D3H3HK[3[0$$9)//Q1 %  ,0::+F+Fy^n+F+o(&,&6&6,33A68XY5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68XY5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fx]m+F+n(&,&6&6/66q9;N;U;UVW;XY5>?1	'# $--44f6K6KFTU6V5WXIMI]I],,-@-E-EF >Q=U=UXh=h:$$C)//QA	* %  .2ZZ-H-H`q-H-r*(.(8(8/66q9;ef7&@A3	)% LPK_K_,,-B-G-GH ,0::+F+FuZj+F+k(&,&6&6188;[=O=OPQ=RS5>?1	'# JNI]I],,-@-E-EF %=?O$O!$$*)//Q1a %   &///66q9;LM0;>?1J>	  CGBVBV,,\->->?!!((#+++-+$
 $$++X,\]  88Q,BJJ?;${2!''	 	 	
 ))$7,$F#G (( !''*'+5$ 
  ))-d6L6LKSg#H$	
 !0  '')>)>{I)V(WX !)?)? )*11)) 1 (( 6& 	 	l+r   c           
         U R                  XU5      (       a  g U R                  R                  USS5      nUc.  U R                  (       d  U R                  R                  USS5      nUc  g UR                  S   nX%   nS nU H  nUR
                  S:X  d  M  Un  O   Uc  g U R                  XW5      =(       d    U R                  XW5      n	U	bn  U	u  pppnUnU R                  XU
5      u  nnUS::  a  [        R                  S5        g U R                  UUUUUUR                  S   UR                  S   S9nUc  g OU R                  XW5      =(       d    U R                  XW5      n	U	c  g U	u  ppnnnUnU R                  XU
5      u  nnUS::  a  [        R                  S5        g U R                  UUUUUUR                  S   UR                  S   S9nUc  g U R                  XU
5      u  nnUS::  a  [        R                  S5        g U R                  R!                  U5        U R"                  U R$                  UR&                  '   U R(                  R+                  UU/5        SU l        g )Nr   r   rg   *fuse_attention: failed to detect num_headsr+   rF   T)fuse_a1111_fp16r   r   r   rF   r)   match_qkv_torch1match_qkv_torch2rA   r?   rw   r   match_qkv_torch1_loramatch_qkv_torch2_lorar   r   appendr   r   r[   r   r   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_nodenode_before_layernorm
root_inputchildren_nodesskip_addnode	match_qkvr!   reshape_qkvtranspose_qkvr    matmul_qmatmul_kmatmul_vattention_last_nodeq_num_headsq_hidden_sizenew_nodematmul_add_qmatmul_add_kmatmul_add_vs                           r   fuseFusionAttentionUnet.fuseU  s   EXYY $

 7 7q Q !(1H1H$(JJ$;$;NIWX$Y! (*11!4
,8"D||u$ # ))*?n4CXCXYcCn	 ]fZIMhRZ"-)-)K)KIgp)q&KaIJ 11$++A.*11!4 2 H    22:H DLfLfMI  irfIMlLZf"-)-)K)KIgp)q&KaIJ 66$++A.*11!4 7 H )-)K)KIgp)q&KaIJ  *6:6J6J$$X]]3##%8-$HI  r   c           
         UR                   S   U:X  a  SOSnU R                  R                  U/ SQUSSSSS/5      nUc  gUu    pVpunU R                  R                  U/ SQ/ SQ5      n	U	c  [        R	                  S5        gU	u      pZU R                  R                  U/ SQ/ S	Q5      nUb  Uu  pnO@U R                  R                  U/ S
Q/ SQ5      nUb  Uu  ppO[        R	                  S5        gU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  nnnnU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu        nnSXgUUUU
4$ )z.Match Q, K and V paths exported by PyTorch 1.*r   r$   )r   rU   rg   	Transposerg   rU   Nrg   r  rg   rU   r$   r   r   r   &fuse_attention: failed to match v pathSoftmaxMulrU   r   r   r   r  r   r  rU   r   r   r   r   'fuse_attention: failed to match qk path&fuse_attention: failed to match q path)r  rg   r  rg   rU   r$   r   r   r   r   &fuse_attention: failed to match k pathFr+   r   match_parent_pathr?   rw   )r   r   r   another_input	qkv_nodes_r   r   
matmul_qkvv_nodesr   qk_nodes_softmax_qk_mul_qk	matmul_qk	_add_zeroq_nodes_transpose_qr    r   k_nodesr   s                         r   r   $FusionAttentionUnet.match_qkv_torch1  s   %^^A.*<!JJ00JD$1a0
	 <E9AMj **..z;hjvw?LLAB%Aq:://
<XZcd08-[9zz33J@ceqrH#?G<)FG**..y:giuv?LLAB18.L)X**..QSb
 ?LLAB!(Aq!Xk)XxQYYYr   c           	      "   UR                   S   U:X  a  SOSnU R                  R                  U/ SQUSSSS/5      nUc  gUu    pVpxU R                  R                  U/ SQ/ SQ5      n	U	c  [        R	                  S5        gU	u    pZU R                  R                  USS	/SS/5      nUb  Uu  pO[        R	                  S
5        gU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  nnnnU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  n  nnU R                  R                  U/ SQ/ SQ5      nUb	  US   U:w  a  [        R	                  S5        gSXgUUUU
4$ )z.Match Q, K and V paths exported by PyTorch 2.*r   r$   )r   rU   rg   r  rU   N)r  rg   rU   r$   r   r   r  r  rU   r  )r  r  rg   rU   r   Nr   r   r  r$   Nr   r   r  SqrtDivr%  CastSliceShaper  rg   Nr   r$   r   r   r   r   r   z*fuse_attention: failed to match mul_q pathTr  )r   r   r   r  r  r  r   r   r  r  r   r  r  r  r  mul_qr  r    r   r  _mul_kr   mul_q_nodess                          r   r   $FusionAttentionUnet.match_qkv_torch2  s   %^^A.*<!JJ00?D$1-
	 9B6AM**..z;]_hi?LLAB"A:://
Y<QTUWXSYZ'/$[)LLBC**..y:cetu?LLAB5<2i**..y:cetu?LLAB#* Ax jj22U'

 +b/Y">LLEF[HhPXXXr   c                    UR                   S   U:X  a  SOSnU R                  R                  U/ SQUSSSSSS/5      nUc  gUu      pVpunU R                  R                  U/ SQ/ SQ5      n	U	c  [        R	                  S5        gU	u      pZU R                  R                  U/ SQ/ S	Q5      nUb  Uu  pnO@U R                  R                  U/ S
Q/ SQ5      nUb  Uu  ppO[        R	                  S5        gU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  nnnnU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu        nnSXgUUUU
4$ )zJMatch Q, K and V paths exported by PyTorch 1 that contains LoRA patterns.*r   r$   )r   r   rU   rg   r  rg   rU   N)rg   r  rg   r   r  +fuse_attention: failed to match LoRA v pathr  r	  r
  r  ,fuse_attention: failed to match LoRA qk path+fuse_attention: failed to match LoRA q path)r  rg   r  rg   r   r  +fuse_attention: failed to match LoRA k pathFr  )r   r   r   r  r  r  r   r   r  r  r   r  r  r  r  r  r  r  r    r   r  r   s                         r   r   )FusionAttentionUnet.match_qkv_torch1_lora  s   %^^A.*<!JJ00QAtT1a3
	
 ?H<Aq} **..z;egst?LLFG")Aq:://
<XZcd08-[9zz33J@ceqrH#?G<)KL**..y:dfrs?LLFG5<2L)\**..NP_
 ?LLFG%,"Aq!\k)\<Yeeer   c           
      &   UR                   S   U:X  a  SOSnU R                  R                  U/ SQUSSSSS/5      nUc  gUu      pVpxU R                  R                  U/ SQ/ SQ5      n	U	c  [        R	                  S5        gU	u    pZU R                  R                  USS	/SS/5      nUb  Uu  pO[        R	                  S
5        gU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  nnnnU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  n  nnU R                  R                  U/ SQ/ SQ5      nUb	  US   U:w  a  [        R	                  S5        gSXgUUUU
4$ )zJMatch Q, K and V paths exported by PyTorch 2 that contains LoRA patterns.*r   r$   )r   r   rU   rg   r  rU   N)r  rg   r   r!  r1  r  rU   r2  )r  r  rg   r   r"  r3  r#  r4  r$  r*  r+  z/fuse_attention: failed to match LoRA mul_q pathTr  )r   r   r   r  r  r  r   r   r  r  r   r  r  r  r  r,  r  r    r   r  r-  r   r.  s                          r   r   )FusionAttentionUnet.match_qkv_torch2_loraD  s   %^^A.*<!JJ00FAtT1a0
	
 <E9Aq}**..z;Z\ef?LLFG&A:://
Y<QTUWXSYZ'/$[)LLGH**..y:`bqr?LLFG9@6i**..y:`bqr?LLFG'.$A| jj22U'

 +b/Y">LLJK[L,Xdddr   add_nodec                    U R                   R                  USS/SS/5      nUb  Uu  p4X44$ U R                   R                  U/ SQ/ SQ5      nUb  Uu  pVnXT4$ U R                   R                  U/ SQ/ SQ5      nUb	  Uu  n  pdXT4$ g )NrU   r$   r   )r  rU   rU   r!  )r  r  rU   rU   r  )r   r  )r   r8  
lora_nodeslora_matmul_2_nodelora_matmul_1_nodelora_mul_noder  s          r   r   #FusionAttentionUnet.match_lora_pathw  s     ZZ11x F

 !7A4&;; ZZ11'

 !5?2]1!66 ZZ11.

 !8B5]Aq!66r   c           
      j   U R                   R                  USS/SS/5      nUc%  U R                   R                  USS/SS/5      nUc  gUu  pVUR                  S   nX'   nSn	U H  n
U
R                  S:X  d  M  U
n	  O   U	c  gU R	                  Xy5      nUc  gUu  nnnnnnU R                   R                  USS5      nU R                   R                  USS5      nU R                   R                  USS5      nUb'  Ub$  U R                  (       d  UU:X  a  OUU:w  a  UU:X  d  gUR                  S   UR                  S   :w  a  gUnU R                  US5      =(       d    U R                  US5      nUS::  a  [        R                  S5        gU R                  U5      nU R                  UUUUUUR                  S   UR                  S   S	9nUc  gU R                  R                  U5        U R                  U R                   UR"                  '   U R$                  R'                  UU/5        SU l        g)
zPFuse attention of fp16 UNet exported in A1111 (stable diffusion webui) extensionr'  r   r   Nrg   FTr   r   )r   r  rF   r)   match_qkv_a1111r   r   r+   r5   r?   rw   r<   r   r   r   r   r   r[   r   r   r   )r   r   r   r   
entry_path_castr   r   r   r   r   r   r   r   r    r   r   r   cast_qcast_kcast_vr   r   r   r   s                            r   r   #FusionAttentionUnet.fuse_a1111_fp16  sf   ZZ11.65/TUWXSYZ
55nvyFY\]_`[abJ!'1$*11!4
,8"D||u$ # ((>	 	
 ((61=((61=((61=")-)@)@6!fPVFV& <<?n33A66)((D9aT=O=OPY[`=a!LLEF,,^< --..#&--a0 . 
   *6:6J6J$$X]]3##%8-$HI  r   c           
         UR                   S   U:X  a  SOSnU R                  R                  U/ SQUSSSSS/5      nUc  gUu    pVpxn	U R                  R                  U	/ SQ/ SQ5      n
U
c  [        R	                  S5        gU
u      p[U R                  R                  U	/ SQ/ S	Q5      nUb  Uu    p]p^O[        R	                  S
5        gU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu  nnnnU R                  R                  U/ SQ/ SQ5      nUc  [        R	                  S5        gUu      nnXgUUUU4$ )zKMatch Q, K and V paths exported by A1111 (stable diffusion webui) extensionr   r$   )r   rU   rg   r  rg   EinsumNr  r  r  )r'  r'  r  r  rH  )r   r   r   r   Nr  r  r  r  r  )r   r   r   r  r  r  r   r   reshape_einsum
einsum_qkvr  r   r  r  	einsum_qkr  r  r    r   r  r   s                        r   r@  #FusionAttentionUnet.match_qkv_a1111  se   %^^A.*<!JJ00JD$1a0
	 IRFAM:**..z;hjvw?LLAB%Aq:://DFX
 08-Q9LLBC**..y:giuv?LLAB18.L)X**..y:giuv?LLAB%Aq(9h(RRr   )r   r   r   r   r   r   r   r   )F)__name__
__module____qualname____firstlineno____doc__r	   r2   boolr   r   r5   r<   tuplerA   strr   r   r   r   r   r   r   r   r   r@  __static_attributes____classcell__)r   s   @r   r   r      s   (( ( 	(
 !(  ( (:y T c 8 RW &" &4= &JN &	sCx &D@@ @ 	@
 @ @ @ @ 
T	@DTT  T  	T
 T T T T 
T	TlX t/Zb2Yh.f`1ef**XL\*S *Sr   r   )loggingr   numpyr.   fusion_baser   fusion_utilsr   onnxr   r   r   
onnx_modelr	   rM  r?   r    r   r   <module>r^     s6   
    $ / /  	8	KS& KSr   