
    hE                     Z    S SK Jr  S SKJr  S SKJr  S SKJr  \" \5      r	 " S S\5      r
g)    )	getLogger)Fusion)helper)	OnnxModelc                      ^  \ rS rSrS\4U 4S jjrS\S\4S jrS\S-  4S	 jr	S\S\S\S-  4S
 jr
S\S\S\S-  4S jrS\S\S\S-  4S jrSrU =r$ )FusionFastGelu   modelc                 (   > [         TU ]  USS5        g )NFastGeluTanh)super__init__)selfr
   	__class__s     b/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/fusion_fastgelu.pyr   FusionFastGelu.__init__   s    
F3    input_name_to_nodesoutput_name_to_nodec                     U R                  XU5      (       a  g U R                  XU5      (       a  g U R                  XU5      (       a  g U R                  XU5      (       a  g g )N)fuse_1fuse_2fuse_3fuse_4)r   	tanh_noder   r   s       r   fuseFusionFastGelu.fuse   s\    ;;y7JKK;;y7JKK;;y7JKK;;y7JKK Lr   returnNc                 `   UR                   S   U;  a  gX!R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R	                  US5      (       d  gUR                   S   U;  a  gX%R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R                  USSU5      nUc  gU R                  R                  US5      nUS:  a  gUR                  US:X  a  SOS   n	U R                  R                  XxS:X  a  SOSU5      n
U R                  R                  USSU5      nUc  gU R                  R                  USS	S
9nUS:  a  gU R                  R                  USUS:X  a  SOSU5      nUc  gU R                  R                  USSUU
(       a  U
/O/ S9nUc  gU R                  R                  USS	S
9nUS:  a  gU R                  R                  USUS:X  a  SOSU5      nUc  gU R                  R	                  US5      (       d  gUR                  S   U	:w  a  gUUUUUUUU/nU R                  R                  UUR                   S   /UU5      (       d  gU R                  R                  U5        [        R                  " SU	/UR                   U R                  R                  S5      S9nSUl        U R                   R#                  U5        U R$                  U R&                  UR(                  '   g)a  
Fuse Gelu with tanh into one node:
      +---------------------------+
      |                           |
      |                           v
    [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul
      |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)     ^
      |                                                              |
      +------> Mul(B=0.5)--------------------------------------------+
Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
r   N   Add      ?Mul      ?+ݓ?-C6?deltaexclude,C?Pow      @r   inputsoutputsnamecom.microsoftT)outputlenop_typer
   has_constant_inputmatch_parentfind_constant_inputinput
get_parentis_safe_to_fuse_nodesnodes_to_removeextendr   	make_nodecreate_node_namedomainnodes_to_addappendthis_graph_namenode_name_to_graph_namer2   )r   r   r   r   childrenadd_after_tanhmul_after_tanhmul_halfi
root_input	root_nodemul_before_tanhadd_before_tanhmul_after_powpowsubgraph_nodes
fused_nodes                    r   r   FusionFastGelu.fuse_1   sG    A&99&'7'7':;x=A!!4!4!=!!zz,,^SAA  #+>>&'<'<Q'?@x=A!!4!4!=!!::**>5$H[\JJ**8S9q5^^aAQ7
 JJ))(aAQH[\	**11)UAGZ["JJ**?F&*Qq5**11/5qTUv![\^qr"

//#,YK" 0 
  JJ**=&*Oq5jj%%mUaAQPcd;zz,,S#6699Q<:% 	
 zz//""1%&	
 
 ##N3%%<")),,Z8	

 ,
  ,8<8L8L$$Z__5r   c                     UR                   S   U;  a  gX!R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R	                  US5      (       d  gUR                   S   U;  a  gX%R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R                  US5      nUS:  a  gUR                   S   U;  a  gX&R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R                  UUR                  S   UR                   S   :X  a  SOSU5      n	U R                  R                  USSU5      n
U
c  gU R                  R                  U
SS	S
9nUS:  a  gU R                  R                  U
SUS:X  a  SOSU5      nUc  gU R                  R                  USSUU	(       a  U	/O/ S9nUc  gU R                  R                  USS	S
9nUS:  a  gU R                  R                  USUS:X  a  SOSU5      nUc  gU R                  R	                  US5      (       d  gUR                  UR                  S   UR                   S   :X  a  SOS   nUR                  S   U:w  a  gUUUUU
UUU/nU R                  R                  UUR                   S   /UU5      (       d  gU R                  R                  U5        [        R                  " SU/UR                   U R                  R                  S5      S9nSUl        U R                   R#                  U5        U R$                  U R&                  UR(                  '   g)ax  
This pattern is from Tensorflow model.
Fuse Gelu with tanh into one node:
      +---------------------------+
      |                           |
      |                           v
    [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul(B=0.5)-->Mul-->
      |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)                  ^
      |                                                                           |
      +---------------------------------------------------------------------------+
Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
r   Nr!   r"   r#   r$   r%   r&   r'   r(   r*   r,   r-   r.   r   r/   r3   T)r4   r5   r6   r
   r7   r9   r;   r:   r8   r<   r=   r>   r   r?   r@   rA   rB   rC   rD   rE   r2   )r   r   r   r   rF   rG   rI   rJ   mul_after_mul_halfrL   rM   rN   rO   rP   rK   rQ   rR   s                    r   r   FusionFastGelu.fuse_2   s    A&99&'7'7':;x=A!!4!4!=!!zz,,^SAA  #+>>&'<'<Q'?@x=A!!4!4!=A;JJ**8S9q5??1%88&q'9:x=A!!4!4!=%a[ JJ))#))!,0BBA
	 **11)UAGZ["JJ**?F&*Qq5**11/5qTUv![\^qr"

//#,YK" 0 
  JJ**=&*Oq5jj%%mUaAQPcd;zz,,S#66'--3E3K3KA3NRZRaRabcRd3dajkl
99Q<:% 	
 zz//&&q)*	
 
 ##N3%%<&--,,Z8	

 ,
  ,8<8L8L$$Z__5r   c           	         UR                   S   U;  a  gX!R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R	                  US5      (       d  gUR                   S   U;  a  gX%R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R                  USSU5      nUc  gU R                  R                  US5      nUS:  a  gUR                  US:X  a  SOS   n	U R                  R                  USSU5      n
U
c  gU R                  R                  U
SSU5      nUc  gU R                  R                  US5      nUS:  a  gU R                  R                  U
SSU5      nUc  gU R                  R                  USS	S
9nUS:  a  gUR                  US:X  a  SOS   U	:w  a  gU R                  R                  USUS:X  a  SOSU5      nUc  gUR                  S   U	:X  a  SnOUR                  S   U	:X  a  SnOgU R                  R                  USUU5      nUc  gU R                  R                  USS	S
9nUS:  a  gUR                  US:X  a  SOS   U	:w  a  gUUUU
UUUUU/	nU R                  R                  UUR                   S   /UU5      (       d  gU R                  R                  U5        [        R                  " SU	/UR                   U R                  R                  S5      S9nSUl        U R                  R!                  U5        U R"                  U R$                  UR&                  '   g)a6  
OpenAI's gelu implementation, also used in Megatron:
   Gelu(x) = x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1.0 + 0.044715 * x * x)))

Fuse subgraph into a FastGelu node:
    +------------ Mul (B=0.79788456) -------------------+
    |                                                   |
    +-------------------------------+                   |
    |                               |                   |
    |                               v                   v
  [root] --> Mul (B=0.044715) --> Mul --> Add(B=1) --> Mul --> Tanh --> Add(B=1) --> Mul-->
    |                                                                                 ^
    |                                                                                 |
    +-----------> Mul (B=0.5) --------------------------------------------------------+
r   Nr!   r"   r#   r$   r%   r&   r'   r(   r,   r   r/   r3   T)r4   r5   r6   r
   r7   r8   r9   r:   r<   r=   r>   r   r?   r@   rA   rB   rC   rD   rE   r2   )r   r   r   r   rF   rG   mul_lastrI   rJ   rK   rM   add_1jmul_7978kmul_before_add_1anothermul_0447mrQ   rR   s                        r   r   FusionFastGelu.fuse_3   s     A&99&'7'7':;x=A!!4!4!=!!zz,,^SAA  #+>>&'<'<Q'?@x=A!!4!4!=A;::**8UDBUVJJ**8S9q5^^aAQ7
**11)UAGZ["

''FYZ=JJ**5#6q5::**?E4I\]JJ**8V6*Jq5>>qAv!1-;::225%a1fRSUhi#!!!$
2G##A&*4G::**+;UGM`aJJ**8V6*Jq5>>qAv!1-; 

 zz//__Q 	
 
 ##N3%%<OO,,Z8	

 ,
  ,8<8L8L$$Z__5r   c           	         UR                   S   U;  a  gX!R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R	                  US5      (       d  gUR                   S   U;  a  gX%R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nUR                   S   U;  a  gX&R                   S      n[        U5      S:w  d  US   R                  S:w  a  gUS   nU R                  R	                  US5      (       d  gUR
                  UR
                  S   UR                   S   :X  a  SOS   nU R                  R                  USSU5      n	U	c  gU R                  R                  U	SS	S
9n
U
S:  a  gU R                  R                  U	SU
S:X  a  SOSU5      nUc  gUR
                  S   U:X  a  SnOUR
                  S   U:X  a  SnOgU R                  R                  USX5      nUc  gU R                  R                  USS	S
9nUS:  a  gU R                  R                  USUS:X  a  SOSU5      nUc  gUR
                  S   U:X  a  SnOUR
                  S   U:X  a  SnOgU R                  R                  USX5      nUc  gUR
                  S   U:w  d  UR
                  S   U:w  a  gUUUUU	UUUU/	nU R                  R                  UUR                   S   /UU5      (       d  gU R                  R                  U5        [        R                  " SU/UR                   U R                  R                  S5      S9nSUl        U R                  R!                  U5        U R"                  U R$                  UR&                  '   U R)                  S5        g)a  
PyTorch's gelu implementation with tanh approximation:
   Gelu(x) = 0.5 * x * (1 + torch.tanh(0.7978845834732056 * (x + 0.044714998453855515 * x * x * x)))

Fuse Gelu with tanh into one node:
      +-----------------+------------------+
      |                 |                  |
      |                 v                  v
    [root] ==> Mul --> Mul --> Mul -----> Add  --> Mul --> Tanh --> Add -----> Mul --> Mul -->
      |                       (A=0.0447)          (A=0.7978)        (A=1)       ^     (A=0.5)
      |                                                                         |
      +-------------------------------------------------------------------------+
Note that constant input for Add and Mul could be first or second input.
r   Nr!   r"   r#   r$   r%   r&   g{Gz?r(   r,   r   r/   r3   T)r4   r5   r6   r
   r7   r:   r8   r9   r<   r=   r>   r   r?   r@   rA   rB   rC   rD   rE   r2   increase_counter)r   r   r   r   rF   rG   rH   rI   rK   rM   r\   rN   r^   rO   r`   	mul_cubedmul_squaredrQ   rR   s                      r   r   FusionFastGelu.fuse_4s  s    A&99&'7'7':;x=A!!4!4!=!!zz,,^SAA  #+>>&'<'<Q'?@x=A!!4!4!=!!  #+>>&'<'<Q'?@x=A!!4!4!=A;zz,,Xs;;#))~/C/CA/F.J_J_`aJb/b!hij
**11)UAGZ["JJ**?F$*Oq5**11/5qTUv![\^qr"  #z1G""1%3G

//e JJ**=&*Mq5JJ++M5qAv!STVij	??1+G__Q:-Gjj--i]Q:-1B1B11E1S 

 zz//__Q 	
 
 ##N3%%<OO,,Z8	

 ,
  ,8<8L8L$$Z__5j)r    )__name__
__module____qualname____firstlineno__r   r   dictr   boolr   r   r   r   __static_attributes____classcell__)r   s   @r   r   r      s    4i 44 d jTTX[ jXrT rPT rY]`dYd rhrT rPT rY]`dYd rhyT yPT yY]`dYd y yr   r   N)loggingr   fusion_baser   onnxr   
onnx_modelr   rh   loggerr   rg   r   r   <module>ru      s+   
     	8	^V ^r   