
    hO                         S SK Jr  S SKJr  S SKJrJr  S SKJr  \" \	5      r
 " S S\5      r " S S\5      r " S	 S
\5      rg)    )	getLogger)Fusion)TensorProtohelper)	OnnxModelc                   N   ^  \ rS rSrS
S\S\S\4U 4S jjjrS\S\4S jrS	r	U =r
$ )FusionLayerNormalization   modelcheck_constant_and_dimensionforcec                 @   > [         TU ]  USS5        X l        X0l        g NLayerNormalization
ReduceMean)super__init__r   r   )selfr   r   r   	__class__s       c/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_layernorm.pyr   !FusionLayerNormalization.__init__   s      4lC,H)
    input_name_to_nodesoutput_name_to_nodec                  
   / nU R                   R                  X5      n[        U5      S:X  d  [        U5      S:  a  gUR                  S   nUS   R                  S:w  d  US   R                  S   U:w  a  g[        U5      S:X  a*  US   R                  S:w  d  US   R                  S   U:w  a  gSnU HN  nU R                   R                  USUSS9n	U	b  U	n  O,U R                   R                  US	S/5      n
U
c  MI  U
S
   n  O   Uc  gU R                   R                  U/ SQ/ SQ4/ SQ/ SQ4/U5      u  pnUc  gUS
   nX;  a  gUS   nU R                   R                  U5      u  nnUb  US::  d  US:  a  [        R                  SU 35        gUS   nU R                   R                  US5      S:w  a  gUR                  S   U;  a  gX'R                  S      nU GH  nUR                  S	:X  a<  UR                  U5        UR                  S   U;  a  M:  UUR                  S      S   nOUnUR                  S:w  a  Md  UR                  S   U;  a  My  UUR                  S      S   nUR                  S:w  a  M  UR                  U5        UR                  U5        UR                  USS
 5        UR                  UUU/5        UR                  S	:w  a  UOUnUR                  SU R                   R                  UR                  S   U5      -
     nU R                   (       a%  U R                   R#                  USS5      (       d  GMm  UR                  SU R                   R                  UR                  S   U5      -
     nU R                   (       a%  U R                   R#                  USS5      (       d  GM  UR                  S   nU R                   R%                  UUR                  UU5      (       d1  U R&                  (       a  SU l        O3[        R                  S5        GMI  U R*                  R                  U5        [,        R.                  " SUR                  S   UU/U/U R                   R1                  SSS9S9nUR2                  R                  [,        R4                  " S[7        U5      5      /5        U R8                  R                  U5        U R:                  U R<                  UR>                  '   GM     g)a=  
Fuse Layer Normalization subgraph into one node LayerNormalization:
      +----------------------+
      |                      |
      |                      v
  [Root] --> ReduceMean -->  Sub  --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add
             (axis=2 or -1)  |      (Y=2)   (axis=2 or -1)  (B=E-6 or E-12)    ^
                             |                                                 |
                             +-------------------------------------------------+

 It also handles cases of duplicated sub nodes exported from older version of PyTorch:
      +----------------------+
      |                      v
      |           +-------> Sub-----------------------------------------------+
      |           |                                                           |
      |           |                                                           v
  [Root] --> ReduceMean -->  Sub  --> Pow --> ReduceMean --> Add --> Sqrt --> Div  --> Mul --> Add
      |                      ^
      |                      |
      +----------------------+
r      NSub   DivF	recursiveCastSqrtAddr   Powr   r   r   r   r   r   )r%   r&   r   r'   r"   r   )r   r   r   r   r   r   -C6?Hskip SkipLayerNormalization fusion since epsilon value is not expected:           @Mulr&   layernorm weightlayernorm biasT4It is not safe to fuse LayerNormalization node. Skipr   	LayerNormname_prefixinputsoutputsnameepsilon) r   get_childrenleninputop_typefind_first_child_by_typematch_child_pathmatch_parent_pathsget_constant_inputloggerdebugfind_constant_inputoutputappendextendinput_indexr   $is_constant_with_specified_dimensionis_safe_to_fuse_nodesr   prune_graphnodes_to_remover   	make_nodecreate_node_name	attributemake_attributefloatnodes_to_addthis_graph_namenode_name_to_graph_namer7   )r   noder   r   subgraph_nodeschildren
root_inputdiv_nodechild
div_node_1
div_node_2_path_idparent_nodes_sub_nodeadd_eps_nodeir8   pow_nodediv_children	temp_nodemul_nodelast_add_nodenode_before_weightweight_input
bias_inputlayer_norm_outputnormalize_nodes                               r   fuseFusionLayerNormalization.fuse   s   , ::**4Ex=AX!2ZZ]
A;%'8A;+<+<Q+?:+Mx=A{""e+x{/@/@/Cz/QE<<UEK^jo<pJ%% "ZZ88P
))"~H  $(JJ$A$A<oNDFXY  %
! ###AZZ22<@
7?glg.>LLcdkclmn?::))(C8A=??1%88 +??1+=>%I  F*%%i0##A&.AA.y/?/?/BCAF %5(q!)<</0BCAFM$$-!!$'!!(+!!,s"34!!=(H"EF-6->->&-Hi#>>!djj.D.DEWE^E^_`Eack.l*lmL009h9ha!3: : &,,Q1G1GXYHZ\i1j-jkJ009h9hA/: :  - 4 4Q 7::33$$##	  ::'+D$LL!WX$$++N;#--$

1|Z@*+ZZ001ES^0_	N $$++V-B-B9eT[n-],^_$$^4@D@T@TD(()<)<=A &r   )r   r   rJ   )TF)__name__
__module____qualname____firstlineno__r   boolr   dictrl   __static_attributes____classcell__r   s   @r   r	   r	      sC    i t [_  
SUd SU SU SUr   r	   c                   d   ^  \ rS rSrS\4U 4S jjrS rSS\S\\	   4S jjr
S\S	\4S
 jrSrU =r$ )FusionLayerNormalizationNCHW   r   c                 (   > [         TU ]  USS5        g r   r   r   r   r   r   s     r   r   %FusionLayerNormalizationNCHW.__init__   s     4lCr   c                    U R                   R                  U5      nUc  [        R                  U SU S35        g [	        UR
                  5      S:w  d&  UR
                  S   S:w  d  UR
                  S   S:w  a(  [        R                  U SU SUR
                   35        g UR                  UR
                  S   /5      $ )N z is not initializer.r+   r   r   z* shall have 3 dimensions Cx1x1. Got shape r   )r   get_constant_valuerA   rB   r:   shapereshape)r   output_namedescriptionvalues       r   get_weight_or_bias/FusionLayerNormalizationNCHW.get_weight_or_bias   s    

--k:=LLK=+6JKLu{{q EKKNa$75;;q>Q;NLLK=+6`afalal`mno}}ekk!n-..r   
input_namepermc                     U R                   R                  S5      nUc  US-   S-   U-   n[        R                  " SU/U/US9nUR                  R                  [        R                  " SU5      /5        U$ )z&Append a Transpose node after an input	Transpose_out-r4   r   )r   rM   r   rL   rN   rF   rO   )r   r   r   r   	node_nametranspose_nodes         r   create_transpose_node2FusionLayerNormalizationNCHW.create_transpose_node   sw    JJ//<	#f,s2Z?K))+zlU`Tahqr  '')>)>vt)L(MNr   r   r   c                 
   [         R                  " US5      n[        U[        5      (       a  US/:w  a  g/ nU R                  R                  X5      n[        U5      S:w  a  gUR                  S   nUS   R                  S:w  d  US   R                  S   U:w  a  gUS   nU R                  R                  USUSS9n	U	c  gU R                  R                  U	/ S	Q/ S
QU5      n
U
c  gU
u  ppnX:w  a  gU R                  R                  U5      u  nnUb  US::  d  US:  a  [        R                  SU 35        g[         R                  " US5      n[        U[        5      (       d   eUS/:w  a  gU R                  R                  US5      S:w  a  gX)R                  S      S   nUnUR                  S:w  a  gUUR                  S      S   nUR                  S:w  a  gUR!                  U5        UR#                  U
5        UR#                  UUU	/5        U R                  R%                  UUR                  UU5      (       d  [        R                  S5        gUR                  S:w  a  U	OUnUR                  SU R                  R'                  UR                  S   U5      -
     nU R)                  US5      nUc  gUR                  SU R                  R'                  UR                  S   U5      -
     nU R)                  US5      nUc  g[*        R,                  " US-   [.        R0                  UR2                  U5      n[*        R,                  " US-   [.        R0                  UR2                  U5      nU R                  R5                  UU R6                  5        U R                  R5                  UU R6                  5        U R8                  R#                  U5        U R;                  UR                  S   / SQ5      nU R                  R=                  SSS9nU R;                  US-   / SQUR                  S   5      n[*        R>                  " SUR                  S   US-   US-   /US-   /US9nUR@                  R#                  [*        RB                  " S[E        U5      5      /5        U RF                  R!                  U5        U RF                  R!                  U5        U RF                  R!                  U5        U R6                  U RH                  URJ                  '   U R6                  U RH                  URJ                  '   U R6                  U RH                  URJ                  '   Sn U RM                  U 5        g)a  
Fuse Layer Normalization subgraph into one node LayerNormalization:
      +----------------------+
      | NxCxHxW              |
      |                      v                                                     (Cx1x1)  (Cx1x1)
  [Root] --> ReduceMean -->  Sub --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add -->
             (axes=1)        |      (Y=2)     (axes=1)     (E-6)             ^
                             |                                               |
                             +-----------------------------------------------+

Fused subgraph:
               (0,2,3,1)                            (0,3,1,2)
    [Root] --> Transpose --> LayerNormalization --> Transpose -->
axesr   Nr   r   r   Fr    r$   r(   r)   r*   r,   r-   r&   r0   r"   r.   r/   _NHWC)r   r   r+   r   r   r1   r2   	_out_nhwc)r   r+   r   r   r4   r8   zLayerNormalization(NHWC))'r   get_node_attribute
isinstancelistr   r9   r:   r;   r<   r=   match_parent_pathr@   rA   rB   rC   rD   rE   rF   rI   rG   r   r   make_tensorr   FLOATr   add_initializerrR   rK   r   rM   rL   rN   rO   rP   rQ   rS   r7   increase_counter)!r   rT   r   r   r   rU   rV   rW   subrX   r]   
_sqrt_nodesecond_add_nodereduce_mean_noderb   r_   ra   r8   rd   re   rf   rg   rh   weightri   biasweight_nhwc	bias_nhwctranspose_inputlayernorm_node_nametranspose_outputrk   counter_names!                                    r   rl   !FusionLayerNormalizationNCHW.fuse   s    ++D&94&&4A3;::**4Ex=AZZ]
A;%'8A;+<+<Q+?:+Mqk::66sECVbg6hzz337	
 LXI
%5?ZZ22?C
7?glg.>LLcdkclmn++,<fE$%%%%A3;::))(C8A='(:;A>	u$+HOOA,>?B  E)d#l+}hABzz//  	
 
 LLOP)2):):f)DX)~~a$***@*@ASAZAZ[\A]_g*h&hi((7IJ>"((TZZ-C-CHOOTUDVXe-f)fg
&&z3CD<(()?ARARTZT`T`bhi&&zG';[=N=NPVP\P\^de	

"";0D0DE

""9d.B.BC##N344TZZ]LQ"jj99:N\g9h55+-|]=Q=QRS=T
  )) #**1-|g/EzT[G[\(;67$	
 	  '')>)>y%PW.)Y(Z[  1  0  !12=A=Q=Q$$_%9%9:<@<P<P$$^%8%89>B>R>R$$%5%:%:;1l+r    )N)rn   ro   rp   rq   r   r   r   strr   intr   rs   rl   rt   ru   rv   s   @r   rx   rx      sI    Di D
/
 
49 
},d }, }, },r   rx   c                   B   ^  \ rS rSrS\4U 4S jjrS\S\4S jrSrU =r	$ )FusionLayerNormalizationTFiF  r   c                 *   > [         TU ]  USSS5        g )Nr   r&   TFr{   r|   s     r   r   #FusionLayerNormalizationTF.__init__G  s     4eTBr   r   r   c                 f   / nU R                   R                  U/ SQ/ SQ4/ SQ/ SQ4/U5      u  pVnUc  g[        U5      S:X  d   eUS   S;   a  US	   S;   a	  US
   S;   d  [        R	                  S5        gUSS u  nnn	n
nnUSS u  pnnSn[        U5      S:X  a  US   nUR
                  S:X  d   eU R                   R                  USSU5      nUc  [        R	                  S5        gU R                   R                  USU5      nUc  UOU R                   R                  USU5      nUc  [        R	                  S5        gU R                   R                  U5      u  nnUb  US::  d	  US:  a  Uc  [        R	                  S5        gUcP  UR                  S   UR                  ;  d  UR                  S   UR                  ;  a  [        R	                  S5        gUbP  UR                  S   UR                  ;  d  UR                  S   UR                  ;  a  [        R	                  S5        gUR                  S   UR                  S	   :w  a  [        R	                  S5        gUUUU	U
UUUUUUU/nUbK  U R                   R                  USSU5      nUc  [        R	                  S5        gUR                  UUU/5        U R                   R                  UUR                  U R                   R                  5       U R                   R                  5       5      (       d  [        R	                  S5        gU R                  R                  U5        U	R                  S	   nUR                  S   n[         R"                  " SUR                  S   UU/UR                  S   /U R                   R%                  SSS9S9nUR&                  R                  [         R(                  " S[+        U5      5      /5        U R,                  R/                  U5        U R0                  U R2                  UR4                  '   g)a  
 Layer Norm from Tensorflow model(using keras2onnx or tf2onnx):
  +------------------------------------+
  |                                    |
  |                                    |
(Cast_1)                               |
  |                                    |
  |                                    v                                           (B)                             (B)             (A)
 Add --> (Cast_1) --> ReduceMean -->  Sub  --> Mul --> ReduceMean --> (Cast_3) --> Add --> Sqrt --> Reciprocol --> Mul --> Mul --> Sub --> Add
  |                       |                                                                                         |       ^              ^
  |                       |                                                                                         |       |              |
  |                       +--------------------------------------------------(Cast_2)-------------------------------|-------+              |
  |                                                                                                                 v                      |
  +---------------------------------------------------------------------------------------------------------------> Mul--------------------+
)
r   r-   r-   
Reciprocalr%   r&   r   r-   r   r   )
r   r   Nr   r   r   Nr   r   N)r   r-   r-   r   r%   r&   r"   r   r-   r   r   )r   r   Nr   r   r   r   Nr   r   NNr+   r   )r   r   r   r   z=return indice is exepected in [0, 1], but got {return_indice}      r"   r-   zmul_node_3 not foundzroot node is nonegh㈵>zepsilon is not matchedz;reduce_mean_node_1 and mul_node_3 shall link from root nodez%mul_node_2 shall have two same inputszcast_node_2 not foundz$not safe to fuse layer normalizationr   r1   r2   r4   r8   )r   r?   r:   rA   rB   r<   match_parent
get_parentr@   r;   rF   rI   rD   r   r   rK   r   rL   rM   rN   rO   rP   rQ   rE   rR   rS   r7   )r   rT   r   r   return_indicer^   r]   
sub_node_0
mul_node_0
mul_node_1reciprocol_node	sqrt_node
add_node_0reduce_mean_node_0
mul_node_2
sub_node_1reduce_mean_node_1cast_node_3
mul_node_3node_before_reduce	root_nodera   r8   rU   cast_node_2rh   ri   
fused_nodes                               r   rl   FusionLayerNormalizationTF.fuseJ  sH     )-)F)F <  ?! B  G$*
&L =!Q&&&a F*}Q/?6/Im\]N^bhNhLLXY !	
IUVXVYIZF
4F|"&q/K&&&000ZZ,,T5!=PQ
LL/0!ZZ223EqJ]^ " &&'91>QR 	
 LL,-ZZ22:>
7?glw/?KDWLL12$$Q'z/?/??CUC[C[\]C^fpfvfvCvLLVW"$$Q'z/?/??CUC[C[\]C^fpfvfvCvLLVWA*"2"21"55LL@A 
 "**11*faI\]K"45!!#5{K"PQzz//KKJJ**,JJ**,	
 
 LL?@##N3!''*%%a(
 %% $$Q'zB[[^$,,-A{,[	

 	##V%:%:9eGn%U$VW  ,8<8L8L$$Z__5r   r   )
rn   ro   rp   rq   r   r   rs   rl   rt   ru   rv   s   @r   r   r   F  s1    Ci C_Md _M _M _Mr   r   N)loggingr   fusion_baser   onnxr   r   
onnx_modelr   rn   rA   r	   rx   r   r   r   r   <module>r      sM   
   $  	8	YUv YUxY,6 Y,xcM cMr   