
    h&Q                     v    S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
Jr  S SKJr  \" \5      r " S S\5      rg)	    )	getLoggerN)Fusion)NumpyHelper)	NodeProtohelpernumpy_helper)	OnnxModelc                   
  ^  \ rS rSrSrS\S\S\4U 4S jjrS\S\4S	 jr	S
\S\4S jr
S r S!S\S\S\S\\\4   4S jjrS\S\S\S\S\S\S\S\S\S\S-  4S jrS rS rS\4S jrS"S jrS\S\S\S\S\4
S jrS rU =r$ )#FusionMultiHeadAttentionSam2   zA
Fuse MultiHeadAttention subgraph of Segment Anything v2 (SAM2).
modelhidden_size	num_headsc                 ^   > [         TU ]  USS/5        X l        X0l        SU l        SU l        g )NMultiHeadAttentionLayerNormalizationT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__s       h/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_attention_sam2.pyr   %FusionMultiHeadAttentionSam2.__init__   s8     	 47K6LM&" "&#'     	reshape_qreturnc                 &   SnU R                   R                  UR                  S   5      nUbG  [        U[        R
                  5      (       a(  [        UR                  5      S/:X  a  [        US   5      n[        U[        5      (       a  US:  a  U$ g)Detect num_heads from a reshape node.

Args:
    reshape_q (NodeProto): reshape node for Q
Returns:
    int: num_heads, or 0 if not found
r            )	r   get_constant_valueinput
isinstancenpndarraylistshapeint)r   r   r   shape_values       r   get_decoder_num_heads2FusionMultiHeadAttentionSam2.get_decoder_num_heads#   s     	 jj33IOOA4FG"+rzz22tK<M<M7NSTRU7UA/	i%%)a-r   
reshape_inc                 ~   SnU R                   R                  UR                  S   5      nUbH  [        U[        R
                  5      (       a(  [        UR                  5      S/:X  a  [        US   5      nOU R                   R                  USS5      nUb  [        UR                  5      S:X  ar  U R                   R                  UR                  S   5      nUbG  [        U[        R
                  5      (       a(  [        UR                  5      S/:X  a  [        US   5      n[        U[        5      (       a  US:  a  U$ g)r   r   r          Concat)r   r#   r$   r%   r&   r'   r(   r)   r*   match_parentlen)r   r.   r   r+   concat_shapes        r   get_encoder_num_heads2FusionMultiHeadAttentionSam2.get_encoder_num_heads8   s
    	jj33J4D4DQ4GH"+rzz22tK<M<M7NSTRU7UA/	::22:xKL'C0B0B,Cq,H"jj;;L<N<Nq<QR*!+rzz::tKDUDU?V[\Z]?]$'A$7	i%%)a-r   c                     U R                   R                  UR                  S   5      nU(       a#  [        R                  " U5      R
                  S   $ g)zDetect hidden_size from LayerNormalization node.
Args:
    layernorm_node (NodeProto): LayerNormalization node before Q, K and V
Returns:
    int: hidden_size, or 0 if not found
r"   r   )r   get_initializerr$   r   to_arrayr)   )r   layernorm_nodelayernorm_biass      r   get_hidden_size,FusionMultiHeadAttentionSam2.get_hidden_sizeT   sE     33N4H4H4KL''7==a@@r   r;   
is_encoderc                 :   U(       a  U R                  U5      nOU R                  U5      nUS::  a  U R                  nU R                  S:  aM  X@R                  :w  a>  U R                  (       a-  [        R                  SU R                   SU S35        SU l        U R                  U5      nUS::  a  U R                  nU R                  S:  aM  XPR                  :w  a>  U R                  (       a-  [        R                  SU R                   SU S35        SU l        XE4$ )zDetect num_heads and hidden_size.

Args:
    reshape_q (NodeProto): reshape node for Q
    layernorm_node (NodeProto): LayerNormalization node before Q, K, V
Returns:
    Tuple[int, int]: num_heads and hidden_size
r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )	r6   r,   r   r   loggerwarningr=   r   r   )r   r   r;   r?   r   r   s         r   get_num_heads_and_hidden_size:FusionMultiHeadAttentionSam2.get_num_heads_and_hidden_sizea   s     229=I229=I>I>>A)~~"=%%0@@TU^T__vwx).&**>:!**KaK3C3C$C'''(8(8'99Mk]Zqr ,1(%%r   q_matmulq_addk_matmulk_addv_matmulv_addoutputNc
           
         US:  a$  X-  S:w  a  [         R                  SU SU 35        gU R                  R                  UR                  S   5      n
U R                  R                  UR                  S   5      nU R                  R                  UR                  S   5      nU
(       a  U(       a  U(       d  g[
        R                  " U
5      n[
        R                  " U5      n[
        R                  " U5      n[         R                  SUR                   SUR                   SUR                   S	U 35        U R                  R                  S
5      nUR                  S   UR                  S   UR                  S   /n[        R                  " S
UU	/US9nSUl        UR                  R                  [        R                  " SU5      /5        SR!                  S5      nU R#                  U5        U$ )a  Create an Attention node.

Args:
    q_matmul (NodeProto): MatMul node in fully connection for Q
    q_add (NodeProto): Add bias node in fully connection for Q
    k_matmul (NodeProto): MatMul node in fully connection for K
    k_add (NodeProto): Add bias node in fully connection for K
    v_matmul (NodeProto): MatMul node in fully connection for V
    v_add (NodeProto): Add bias node in fully connection for V
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    output (str): output name

Returns:
    Union[NodeProto, None]: the node created or None if failed.
r   zinput hidden size z# is not a multiple of num of heads Nr    zqw=z kw=z vw=z hidden_size=r   inputsoutputsnamecom.microsoftr   MultiHeadAttention ({})zcross attention)rA   debugr   r9   r$   r   r:   r)   create_node_namerK   r   	make_nodedomain	attributeextendmake_attributeformatincrease_counter)r   rE   rF   rG   rH   rI   rJ   r   r   rK   q_weightk_weightv_weightqwkwvwattention_node_nameattention_inputsattention_nodecounter_names                       r   create_attention_node2FusionMultiHeadAttentionSam2.create_attention_node   s   8 ? 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X(!!(+!!(+!!(+s288*D
$rxxjk][\"jj99:NO LLOLLOLLO
  )) #H$	
 !0  '')>)>{I)V(WX0778IJl+r   c                 z   U R                  XU5      (       a  g U R                  U5      nUcK  UR                  S   U;  a  g X1R                  S      nUR                  S:w  a  g U R                  U5      nUc  g Uu	  pgpppnUnU R	                  XS5      u  nnUS::  a  [
        R                  S5        g U R                  U	U
UUUUUUUR                  S   S9	nUc  g U R                  R                  U5        U R                  U R                  UR                  '   U R                  R                  X/5        SU l        g )Nr   AddF*fuse_attention: failed to detect num_heads)rK   T)fuse_sam_encoder_patternmatch_attention_subgraphr$   op_typerC   rA   rS   rf   rK   nodes_to_addappendthis_graph_namenode_name_to_graph_namerP   nodes_to_removerX   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node	match_qkvskip_addreshape_qkvtranspose_qkvr   matmul_qadd_qmatmul_kadd_kmatmul_vadd_vattention_last_nodeq_num_headsq_hidden_sizenew_nodes                      r   fuse!FusionMultiHeadAttentionSam2.fuse   sb   ((Nabb11.A	##A&.AA*+?+?+BCH5(55h?I cl`I%[`)%)%G%G	ch%i"]!LLEF --&--a0 . 

   *6:6J6J$$X]]3##%8$HI  r   c           	         U R                   R                  U/ SQ/ SQ5      nUc  gUu    p4pVU R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        gUu    p8n	U R                   R                  USS/S	S	/5      n
U
b  U
u  pO[        R                  S
5        gU R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        gUu  pnnnU R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        gUu  n  nnnU R                   R                  U/ SQ/ SQ5      nUb	  US   U:w  a  [        R                  S5        gXEUUUUUX4	$ )z.Match Q, K and V paths exported by PyTorch 2.*ri   MatMulReshape	Transposer   )NNNr   r   N)r   r   ri   r   )r    r   r   Nz&fuse_attention: failed to match v pathSoftmaxr   r   z'fuse_attention: failed to match qk path)Mulr   r   ri   r   )r   Nr   r   Nz&fuse_attention: failed to match q path)r    Nr   r   Nz&fuse_attention: failed to match k path)SqrtDivr   CastSliceShaper   r   )Nr   r    r   r   r   r   r   z*fuse_attention: failed to match mul_q pathr   match_parent_pathrA   rS   )r   node_after_output_projection	qkv_nodes_ry   rz   
matmul_qkvv_nodesr   r   qk_nodes_softmax_qk	matmul_qkq_nodesmul_q_transpose_qr   r|   r{   k_nodes_mul_kr~   r}   mul_q_nodess                           r   rl   5FusionMultiHeadAttentionSam2.match_attention_subgraph   s   JJ00(?$
	 9B6AM**..z;dfuv?LLAB")Ah:://
Y<QTUWXSYZ'/$[)LLBC**..GI^
 ?LLAB<C9i**..GI^
 ?LLAB*1'Auh jj22U'

 +b/Y">LLEF9hxQVX`ggr   c                 d   U R                   R                  U/ SQ/ SQ5      nUc!  U R                   R                  U/ SQ/ SQ5      nUc  U R                   R                  US/S/5      nUc  gUS   nU R                  U[        U5      S	:X  a  S	OS S
9nUc  gUu  pxpp[        R
                  " U
S5      n[        U[        5      (       a  U/ SQ:w  a  g[        R
                  " US5      n[        U[        5      (       a  U/ SQ:w  a  g[        R
                  " US5      n[        U[        5      (       a  U/ SQ:w  a  gU R                   R                  U	/ SQ/ SQ5      nUc  gUu  nnnU R                  UUS5      u  nnUS::  a  [        R                  S5        gSnU R                   R                  U5      nUcP  [        R                  " [        R                  " / SQSS9US9nU R                   R!                  UU R"                  5        U R                   R%                  S5      n[&        R(                  " SU
R*                  S   U/U
R*                  S   S-   /US9nU R,                  R/                  U5        U R"                  U R0                  UR2                  '   U
nUR*                  S   UR*                  S'   UR*                  S   S-   UR4                  S'   [        R                  SU< SU< 35        U R7                  UUUU5      nUc  g[        U R                   R9                  X5      5      S	:X  d   eUR4                  S   UR*                  S'   U R,                  R/                  U5        U R"                  U R0                  UR2                  '   U R:                  R=                  U/5        SU l        g)N)ri   r   r   r   r   Nr   r   )ri   r   r   r   r   r   )r   Nr   r   r   r   ri   r   Fr   r    )input_indexperm)r   r"   r    r1   )r   r"   r1   r    )r   ri   r   )r   r   NTrj   bsnh_to_bsd_reshape_dims)r   r   r   int64)dtype)rP   r   _BSDrM   _BNSHzFound MHA: q_num_heads=z q_hidden_size=) r   r   $match_sam_encoder_attention_subgraphr4   r	   get_node_attributer%   r(   rC   rA   rS   r9   r   
from_arrayr&   arrayadd_initializerrp   rT   r   rU   r$   rn   ro   rq   rP   rK   create_mha_nodeget_childrenrr   rX   rs   )r   rt   ru   rv   nodesr   matched_sdpareshape_outtranspose_out	split_qkvtranspose_qtranspose_ktranspose_vpermutation_qpermutation_kpermutation_vinput_projection_nodesr.   add_in	matmul_inr   r   new_dims_namenew_dimsreshape_q_namer   transpose_k_bnshr   s                               r   rk   5FusionMultiHeadAttentionSam2.fuse_sam_encoder_pattern1  s   < 

,,6

 =JJ00L%E
 =JJ00E
 =',Ry$@@(3u:?aPT A 
 WcTIK "44[&I=$//M\4Q "44[&I=$//M\4Q "44[&I=$//M\4Q!%!=!=("

 ")(>%
FI%)%G%G
Tbdh%i"]!LLEF 3::--m<#..rxx
'/RYfgHJJ&&x1E1EF44Y?$$%%a(-8 &&q)F23	
	 	  +7;7K7K$$Y^^4 '$/$5$5a$8q!%0%6%6q%9G%C"/;.0@-1ABC ''	
  4::**=NOSTTTT'q1!  *6:6J6J$$X]]3##]O4  r   c           	         U R                   R                  U/ SQUSSSS/5      nUc  gUu    pEpgU R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        gUu  ppU R                   R                  USS/SS/5      nUb  Uu  pO[        R                  S	5        gU R                   R                  U/ S
Q/ SQ5      nUc:  U R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        gUS   U
:w  a  gUS   nU R                   R                  U/ S
Q/ SQ5      nUc  [        R                  S5        gUS   U
:w  a  gUu  nnnnXVU
UUU	4$ )z%Match SDPA pattern in SAM2 enconder.*r   Nr   )r   SqueezeSplitr   )r    r   r   r   zfailed to match v pathr   r   zfailed to match qk path)r   r   r   r   r   )	r   r   r   r   MaxPoolr   r   r   r   )	r   Nr   r   r   r   r   r   r   zfailed to match q pathr   r    )r    Nr   r   zfailed to match k pathr   )r   r   r   	out_nodesr   r   r   matmul_qk_vr   r   r   ry   r   r   r   r   r   r   mul_kr   
_squeeze_ks                        r   r   AFusionMultiHeadAttentionSam2.match_sam_encoder_attention_subgraph  s    JJ00(?$a+
	 :C7AM **..{<hjvw?LL123:0:://i=RUVXYTZ['/$[)LL23**..y:bdst?jj22s.G
 562;)#aj**..y:bdst?LL122;)#.5+Z9k;P[[[r   r   r   c                    U R                   R                  S5      nUR                  S   UR                  S   UR                  S   /nUS-   n[        R                  " SUU/US9nSUl        UR                  R                  [        R                  " SU5      /5        SR                  S5      n	U R                  U	5        U$ )	a  Create a MultiHeadAttention node for SAM2 encoder.

Args:
    reshape_q (NodeProto): Reshape node for Q, output is 3D BxSxNH format
    transpose_k (NodeProto): Transpose node for K, output is BNSH format
    transpose_v (NodeProto): Transpose node for V, output is BNSH format
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

Returns:
    NodeProto: the MultiHeadAttention node created.
r   r   _outrM   rQ   r   rR   zself attention)r   rT   rK   r   rU   rV   rW   rX   rY   rZ   r[   )
r   r   r   r   r   rb   rN   rK   rd   re   s
             r   r   ,FusionMultiHeadAttentionSam2.create_mha_node  s    & #jj99:NO Qq!q!
 %v-)) H$	
 !0  '')>)>{I)V(WX0778HIl+r   )r   r   r   r   rs   )F)N)__name__
__module____qualname____firstlineno____doc__r	   r*   r   r   r,   r6   r=   booltuplerC   strrf   r   rl   rk   r   r   __static_attributes____classcell__)r   s   @r   r   r      sT   (( ( 	(y S *	 c 8 SX"&""&4="&KO"&	sCx"&H>> > 	>
 > > > > > > 
T	>@0 d5htBdh BH5\n)) ) 	)
 ) 
) )r   r   )loggingr   numpyr&   fusion_baser   fusion_utilsr   onnxr   r   r   
onnx_modelr	   r   rA   r    r   r   <module>r      s4   
    $ 0 0  	8	E6 Er   