
    hMA                         S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
  S SKJrJr  S SKJr  \" \5      r " S S	\5      rg)
    )	getLoggerN)AttentionMask)Fusion)FusionUtilsNumpyHelper)	NodeProtohelper)	OnnxModelc                   ^   ^  \ rS rSrS\S\S\S\4U 4S jjrS\S\	\\4   4S	 jr
S
 rSrU =r$ )FusionQOrderedAttention   modelhidden_size	num_headsattention_maskc                 L   > X l         X0l        X@l        [        TU ]  USS5        g )NQOrderedAttentionQOrderedLayerNormalization)r   r   r   super__init__)selfr   r   r   r   	__class__s        l/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_qordered_attention.pyr    FusionQOrderedAttention.__init__   s)     '", 35QR    	reshape_qreturnc                    U R                   R                  UR                  S   5      nUc  [        R	                  UR                  S    S35        U R                   R                  US/S/5      nUc  U R                  U R                  4$ US   n[        UR                  5      S:w  a  U R                  U R                  4$ UR                  S   R                  n[        R                  " U5      n[        U5      S:w  d  US   S::  d	  US   S::  a1  [        R	                  SU S	35        U R                  U R                  4$ US   nUS   nXV-  nU R                  S:  aM  XPR                  :w  a>  U R                  (       a-  [        R                  S
U R                   SU S35        SU l        U R                  S:  aM  XpR                  :w  a>  U R                  (       a-  [        R                  SU R                   SU S35        SU l        XW4$ )zDetect num_heads and hidden_size from a reshape node.
Args:
    reshape_q (NodeProto): reshape node for Q
Returns:
    Tuple[int, int]: num_heads and hidden_size
   z is not initializer.Constantr            zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   get_initializerinputloggerdebugmatch_parent_pathr   r   len	attributetr   to_arraynum_heads_warningwarninghidden_size_warning)r   r   q_shapeconstant_nodeq_shape_valuer   	head_sizer   s           r   get_num_heads_and_hidden_size5FusionQOrderedAttention.get_num_heads_and_hidden_size    s    **,,Y__Q-?@?LLIOOA.//CDE !JJ88ZLSTRUVM$~~t'7'777 -a 0}../14>>4+;+;;; (11!466#,,W5}"}Q'71'<a@PTU@ULL>-8opq>>4#3#333!!$	!!$	+>>A)~~"=%%0@@TU^T__vwx).&aK3C3C$C'''(8(8'99Mk]Zqr ,1(%%r   c                    U R                   R                  USS/SS/5      nUb  US   nOg U R                   R                  US/S /5      nUc  [        R                  S5        g US   nU R                   R                  U/ SQ/ SQ5      nUc  [        R                  S	5        g Uu  pppn[        R
                  " XR                   5      (       d  g [        R
                  " XR                   5      (       d  g / n[        UR                  5       H7  u  nnUU;  a  M  UUS   R                  S   :X  a  M&  UR                  U5        M9     [        U5      S
:w  a  g US   nU R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        g Uu    nnnnn[        R
                  " UU R                   5      (       d  g [        R
                  " UU R                   5      (       d  g U R                   R                  US/S
/5      nUc  [        R                  S5        g US   nU R                   R                  UR                  S   5      c  g [        R
                  " UU R                   S5      (       d  g U R                   R                  U/ SQ/ SQ5      nUc  [        R                  S5        g Uu  nnnnnnn n![        R
                  " UU R                   5      (       d  g [        R
                  " UU R                   5      (       d  g [        R
                  " U U R                   5      (       d  g [        R
                  " UU R                   5      (       d  g U R                   R                  U!/ SQ/ SQ5      n"U"c  [        R                  S5        g U"u  nn#n$n%n&n'[        R
                  " U%U R                   5      (       d  g [        R
                  " U$U R                   5      (       d  g U R                   R                  U'S/S
/5      n(U(c  [        R                  S5        g U(S   n(U R                   R                  U(R                  S   5      c  g [        R
                  " U(U R                   S5      (       d  g U R                   R                  U!/ SQ/ SQ5      n)U)c  [        R                  S5        g U)u    nn*n+n,n-[        R
                  " U+U R                   5      (       d  g [        R
                  " U*U R                   5      (       d  g U R                   R                  U-S/S
/5      n.U.c  [        R                  S5        g U.S   n.U R                   R                  U.R                  S   5      c  g [        R
                  " U.U R                   S5      (       d  g U R                   R                  U/ SQ/ SQ5      n/U/c  [        R                  S5        g U R                   R                  U(R                  S   5      n0U R                   R                  U.R                  S   5      n1U R                   R                  UR                  S   5      n2[        R                  " U05      n3[        R                  " U15      n4[        R                  " U25      n5[        R                   " U3R"                  S
S  5      n6[        R                   " U4R"                  S
S  5      n7[        R                   " U5R"                  S
S  5      n8UR                  S   U:X  Ga  U'R                  S   U:X  Ga  U-R                  S   U:X  Ga  U R$                  R'                  U/S   R                  S   5      n9U R)                  U#5      u  n:n;UR                  S   /n<U<R                  UR                  S
   5        U<R                  U$R                  S
   5        U<R                  U*R                  S
   5        U<R                  UR                  S
   5        U<R                  U(R                  S   5        U<R                  U.R                  S   5        U<R                  UR                  S   5        U<R                  U(R                  S
   5        U<R                  U.R                  S
   5        U<R                  UR                  S
   5        U R                   R                  U&R                  S   5      (       a  U<R                  U&R                  S   5        OU<R                  U&R                  S
   5        U R                   R                  U,R                  S   5      (       a  U<R                  U,R                  S   5        OU<R                  U,R                  S
   5        U R                   R                  UR                  S   5      (       a  U<R                  UR                  S   5        OU<R                  UR                  S
   5        U<R                  U R                  S
   5        U<R                  UR                  S
   5        U<R                  UR                  S
   5        U9b  U<R                  U95        OU<R                  S5        U R                   R                  U(R                  S   5      n=[        R*                  " U=5        U R                   R                  U.R                  S   5      n>[        R*                  " U>5        U R                   R                  UR                  S   5      n?[        R*                  " U?5        U R                   R-                  S5      n@[.        R0                  " SU<U
R                  S   /U@S9nAU R                   R3                  XR                  S   UAR                  S   5        U R                   R3                  XR                  S   UR                  S   5        UAR4                  R7                  [.        R8                  " SU:5      /5        UAR4                  R7                  [.        R8                  " SS
5      /5        UAR4                  R7                  [.        R8                  " SS5      /5        UAR4                  R7                  [.        R8                  " SS
5      /5        UAR4                  R7                  [.        R8                  " SU6U7U8/5      /5        S UAl        U R<                  R                  UA5        U R>                  U R@                  UARB                  '   U RD                  R7                  XX/5        U RD                  R7                  U5        U RD                  R7                  U"5        U RD                  R7                  U)5        U RD                  R7                  U5        U RD                  R7                  U(U.U/5        S!U l#        g g g g )"NQuantizeLinearAddr   DequantizeLinearz=fuse_qordered_attention: failed to match input qdq nodes path)r8   MatMulReshape	Transposer:   r7   r;   )NNr   r   r   r   r   z1fuse_qordered_attention: failed to match qkv pathr   )r=   r<   r:   r7   r8   r;   )r   r   r   r   r   Nz/fuse_qordered_attention: failed to match v pathF)r:   r7   Softmaxr8   Divr:   r7   r;   )r   r   r   r   Nr   r   r   z0fuse_qordered_attention: failed to match qk path)r   r   r   r   r   Nz/fuse_qordered_attention: failed to match q pathz/fuse_qordered_attention: failed to match k path)MulSubCast	UnsqueezerC   )Nr   r   r   r   z8fuse_qordered_attention: failed to match mask_nodes path r   )inputsoutputsnamer   order_inputorder_weightorder_outputqkv_hidden_sizeszcom.microsoftT)$r   r(   r&   r'   r   check_qdq_node_for_fusion	enumerater%   outputappendr)   get_constant_valuer$   r   r,   npprodshaper   process_maskr4   transpose_2d_int8_tensorcreate_node_namer	   	make_nodereplace_node_inputr*   extendmake_attributedomainnodes_to_addthis_graph_namenode_name_to_graph_namerG   nodes_to_removeprune_graph)Br   normalize_nodeinput_name_to_nodesoutput_name_to_nodeadd_before_layernorm
start_nodedequantize_input	qkv_nodes_projection_matmulreshape_qkvtranspose_qkvdequantize_qkvquantize_qkv
matmul_qkvother_inputs_ir%   
root_inputv_nodesdequantize_v
quantize_vadd_vmatmul_vdequantize_v_matmul_weightqk_nodesdequantize_qk_softmaxquantize_qk_softmax
softmax_qkadd_qkdiv_qkdequantize_qkquantize_qk	matmul_qkq_nodesr   dequantize_q
quantize_qadd_qmatmul_qdequantize_q_matmul_weightk_nodesdequantize_k
quantize_kadd_kmatmul_kdequantize_k_matmul_weight
mask_nodesq_weightk_weightv_weightqwkwvwqw_out_sizekw_out_sizevw_out_size
mask_indexr   r   attention_inputsq_weight_tensork_weight_tensorv_weight_tensorattention_node_nameattention_nodesB                                                                     r   fuseFusionQOrderedAttention.fuseS   s   #zz;;u%F 
  +-b1J  ::77 F
 #LLXY+B/ JJ00e'
	 LLLMgpd{>Yc 44\::NN44^ZZPP ":#3#34IB//	!++A..& 5 |!!!_
 **..[!
 ?LLJK<C9A|Z 44ZLL44\4::NN &*ZZ%A%A(M_L`cdbe%f"%-LLJK%?%B"::(()C)I)I!)LMU 445OQUQ[Q[]bcc :://	 (
 LLKL 		
! 445H$**UU445JDJJWW44[$**MM44]DJJOO **..[!
 ?LLJKDKAI|Z 44ZLL44\4::NN &*ZZ%A%A(M_L`cdbe%f"%-LLJK%?%B"::(()C)I)I!)LMU 445OQUQ[Q[]bcc **..[!
 ?LLJK<C9A|Z 44ZLL44\4::NN &*ZZ%A%A(M_L`cdbe%f"%-LLJK%?%B"::(()C)I)I!)LMU 445OQUQ[Q[]bcc ZZ11DFX

 LLST ::--.H.N.Nq.QR::--.H.N.Nq.QR::--.H.N.Nq.QR!!(+!!(+!!(+ggbhhqrl+ggbhhqrl+ggbhhqrl+ >>!
*x~~a/@J/NS[SaSabcSdhrSr,,99*R.:N:Nq:QRJ &*%G%G	%R"I{ !1 6 6q 9:##$4$:$:1$=>##L$6$6q$9:##L$6$6q$9:##L$6$6q$9:##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GHzz))%++a.99 ''A7 ''A7zz))%++a.99 ''A7 ''A7zz))%++a.99 ''A7 ''A7##K$5$5a$89##$7$=$=a$@A##N$8$8$;< % ''
3 ''+ #jj889S9Y9YZ[9\]O00A"jj889S9Y9YZ[9\]O00A"jj889S9Y9YZ[9\]O00A #'**"="=>Q"R#--#'$++A./(	N JJ)).:N:Nq:QSaShShijSklJJ))*;=T=TUV=WYgYnYnopYqr$$++V-B-B;PY-Z,[\$$++V-B-B=RS-T,UV$$++V-B-B>ST-U,VW$$++V-B-B>ST-U,VW$$++&&'9KVa;bcd %4N!$$^4@D@T@TD(()<)<=  ''\(^_  ''1  ''0  ''0  ''0  ''+-GIcd  $DE Ts/N*r   )r   r   r/   r   r-   r`   )__name__
__module____qualname____firstlineno__r
   intr   r   r   tupler4   r   __static_attributes____classcell__)r   s   @r   r   r      s\    SS S 	S
 &S1&y 1&U3PS8_ 1&fQ$ Q$r   r   )loggingr   numpyrQ   fusion_attentionr   fusion_baser   fusion_utilsr   r   onnxr   r	   
onnx_modelr
   r   r&   r    r   r   <module>r      s4      *  1 "  	8	R$f R$r   