
    hM                     ~   S SK Jr  S SKJr  S SKJrJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJrJr  S SKJrJr  S SKJr  S SKJ r   S SK!J"r"  S SK#J$r$  S SK%J&r&  S SK'J(r(  S SK)J*r*  S SK+J,r,  S SK-J.r.J/r/  S SK0J1r1J2r2  S SK3J4r4  S SK5J6r6J7r7J8r8  S SK9J:r:  \" \;5      r< " S S\:5      r=g)    )	getLogger)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionConstantFold)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                   0  ^  \ rS rSrS*S\S\S\4U 4S jjjrS rS rS r	S	 r
S
 rS rS rS rS rS rS rS rS+S jrS rS rS rS\S\\   S\4S jrS\4S jrS rS,S jrS rS rS r S  r!S-S"\"S!-  S#\4S$ jjr#S% r$S.S& jr%S/S'\4S( jjr&S)r'U =r($ )0BertOnnxModel%   model	num_headshidden_sizec                 t  > US:X  a  US:X  d  US:  a  X2-  S:X  d   e[         TU ]  U5        X l        X0l        [	        U 5      U l        [        X R                  U R                  U R
                  5      U l        [        X R                  U R                  U R
                  5      U l	        [        U 5      U l        g)a  Initialize BERT ONNX Model.

Args:
    model (ModelProto): the ONNX model
    num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
    hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
r   N)super__init__r(   r)   r   attention_maskr   attention_fusionr   qordered_attention_fusionr   utils)selfr'   r(   r)   	__class__s       b/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/onnx_model_bert.pyr,   BertOnnxModel.__init__&   s     Q;!#3Q;KbfgKghh"&+D1 /6F6FX\XkXk l)@""DNND4G4G*
& !&
    c                 :    [        U 5      nUR                  5         g N)r	   applyr1   fusions     r3   fuse_constant_fold BertOnnxModel.fuse_constant_fold;       #D)r5   c                 l    U R                   R                  5         U R                  R                  5         g r7   )r.   r8   r/   r1   s    r3   fuse_attentionBertOnnxModel.fuse_attention?   s&    ##%&&,,.r5   c                     [        U 5      nUR                  5         [        U 5      nUR                  5         [        U 5      nUR                  5         [	        U 5      nUR                  5         g r7   )r   r8   r   r   r   r9   s     r3   	fuse_geluBertOnnxModel.fuse_geluD   sN    D!% &#D)r5   c                 :    [        X5      nUR                  5         g r7   )r   r8   )r1   is_fastgelur:   s      r3   fuse_bias_geluBertOnnxModel.fuse_bias_geluO   s    2r5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   gelu_approximation BertOnnxModel.gelu_approximationS   s    (.r5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   fuse_gemm_fast_gelu!BertOnnxModel.fuse_gemm_fast_geluW   r=   r5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   fuse_add_bias_skip_layer_norm+BertOnnxModel.fuse_add_bias_skip_layer_norm[   s    1$7r5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   fuse_reshapeBertOnnxModel.fuse_reshape_   s    t$r5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   
fuse_shapeBertOnnxModel.fuse_shapec   s    T"r5   c                 :    [        X5      nUR                  5         g r7   )r
   r8   )r1   use_mask_indexr:   s      r3   fuse_embed_layerBertOnnxModel.fuse_embed_layerg   s    .tDr5   c                     [        U 5      nUR                  5         [        U 5      nUR                  5         [        U 5      nUR                  5         g r7   )r   r8   r   r   r9   s     r3   fuse_layer_normBertOnnxModel.fuse_layer_normk   s=    )$/+D1 2$7r5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   fuse_simplified_layer_norm(BertOnnxModel.fuse_simplified_layer_normv   s    3D9r5   c                 6    [        XS9nUR                  5         g )N)shape_infer)r   r8   )r1   rc   r:   s      r3   fuse_skip_layer_norm"BertOnnxModel.fuse_skip_layer_normz   s    -dLr5   c                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   fuse_skip_simplified_layer_norm-BertOnnxModel.fuse_skip_simplified_layer_norm~   s    7=r5   c                 @   [        U 5      nUR                  5         [        [        S U R                  R
                  R                  5      5      nU Vs1 s H  o3R                  iM     nnSnU[        U R                  R                  5      :  a  U R                  R                  U   nSUR                  ;   a6  UR                  U;  a&  U R                  R                  R                  U5        OUS-  nU[        U R                  R                  5      :  a  M  g g s  snf )Nc                 L    U R                   S:H  =(       a    U R                  S:g  $ )NRotaryEmbeddingcom.microsoft)op_typedomain)nodes    r3   <lambda>6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s     T\\->>a4;;RaCaar5   r   rk      )r   r8   listfilterr'   graphro   rn   len	functionsnameremove)r1   r:   rot_emb_nodesro   non_ms_domains_to_keepifns          r3   fuse_rotary_embeddings$BertOnnxModel.fuse_rotary_embeddings   s    '-a

  %%
 ;H!H-$++-!H#djj**++%%a(B BGG+		AW0W

$$++B/Q #djj**++ "Is   Dc                 :    [        U 5      nUR                  5         g r7   )r   r8   r9   s     r3   fuse_qordered_mamtul"BertOnnxModel.fuse_qordered_mamtul   s    %d+r5   rm   input_indicescastedc                    / nU R                  5       nU R                  U5      nU H  nU Vs/ s H,  o[        UR                  5      :  d  M  UR                  U   PM.     n	nU	 H  n
U R	                  U
5      (       a  U(       d  UR                  U
5        M3  M5  X;   d  M<  XZ   nUR                  S:X  d  MR  U R	                  UR                  S   5      c  Ms  U(       d  M|  UR                  UR                  S   5        M     M     U$ s  snf )z
Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
Returns a list of the graph input names based on the filter whether it is casted or not.
Castr   )output_name_to_nodeget_nodes_by_op_typerv   inputfind_graph_inputappendrm   )r1   rm   r   r   graph_inputsr   nodesro   r|   bert_inputs
bert_inputparents               r3   get_graph_inputs_from_node_type-BertOnnxModel.get_graph_inputs_from_node_type   s    
 "668))'2D2?W-Qs4::CV=4::a=-KW)
((44!$++J7 "60<F~~/D4I4I&,,WX/4Z4f!6(//Q@ *   Xs   DDc                 \    U R                  S/ SQU5      nX R                  SS/U5      -  nU$ )NEmbedLayerNormalization)r   rr      	Attention   )r   )r1   r   inputss      r3   !get_graph_inputs_from_fused_nodes/BertOnnxModel.get_graph_inputs_from_fused_nodes   s7    556OQZ\bc66{QCPPr5   c                    U R                  5       nSnSnUR                   H?  nU R                  U[        R                  5      u  pVU(       a  US-  nU[        U5      -  nMA     [        R                  SU SU S35        g)zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   rr   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)ru   r   change_graph_input_typer!   INT32rv   loggerinfo)r1   ru   add_cast_countremove_cast_countgraph_inputnew_noderemoved_nodess          r3   change_graph_inputs_to_int32*BertOnnxModel.change_graph_inputs_to_int32   s    

 ;;K&*&B&B;P[PaPa&b#H!#]!33	 '
 	77GG`ar`ss  A	
r5   c                 >   U R                  SS9U R                  SS9-   nU R                  R                  R                   H  nUR                  U;   d  M  UR
                  R                  R                  R                  S   nXl	        Uc  MM  UR
                  R                  R                  R                  S   nX%l	        M     U R                  R                  R                   H6  nUR
                  R                  R                  R                  S   nXl	        M8     g)z4
Update input and output shape to use dynamic axes.
T)r   Fr   Nrr   )r   r'   ru   r   rx   typetensor_typeshapedim	dim_paramoutput)r1   dynamic_batch_dimdynamic_seq_lenbert_graph_inputsr   	dim_protor   s          r3   use_dynamic_axesBertOnnxModel.use_dynamic_axes   s     !BB C 
22%2@A ZZ%%++Ezz..!JJ2288<<Q?	&7#". %

 6 6 < < @ @ CI*9' , jj&&--F//5599!<I"3 .r5   c                 $    U R                  5         g r7   )adjust_reshape_and_expandr?   s    r3   
preprocessBertOnnxModel.preprocess   s    &&(r5   c                 V   / nU R                  5        GHX  nUR                  S:X  d  M  U R                  UR                  S   5      nUbP  UR                  S:X  a@  UR                  U/5        U R                  UR                  S   UR                  S   5        M  U R                  U/ SQ/ SQU R                  5       5      nUc  M  US   nU R                  UR                  S   5      nUS   nU R                  UR                  S   5      nUS   n	Uc  GM  Uc  GM	  [        U5      S	:X  d  GM  [        U5      S:X  d  GM-  US   US   :X  d  GM<  U	R                  S   UR                  S'   GM[     U(       a3  U R                  U5        [        R                  S
[        U5       35        g g )NReshaperr   r   )Expandr   r   Slice)r   r   r   r      z"Removed Reshape and Expand count: )r   rm   get_constant_valuer   sizeextendreplace_input_of_all_nodesr   match_parent_pathr   rv   remove_nodesr   r   )
r1   nodes_to_removero   reshape_shapereshape_pathexpand_nodeexpand_shape_valuereshape_before_expandshape_value
slice_nodes
             r3   r   'BertOnnxModel.adjust_reshape_and_expand   s   JJLD||y( !% 7 7

1 F ,1C1Cq1H#**D6233DKKNDJJqMR  $55< ,,.	   +".r"2K)-)@)@ARARSTAU)V&,8,<)"&"9"9:O:U:UVW:X"YK!-b!1J*6'3 23q8,1.q1[^C(2(9(9!(<

1C !F o.KK<S=Q<RST r5   c                 R   U R                  5       n/ nU R                  5        GH  nSSSS.nUR                  U;   a  XCR                     nU R                  U/ SQUSSSSS/U5      nUbm  Uu  nnn	n
nnUR                  S   U R                  5       R                  S   R                  :X  a,  UR                  S   UR                  S'   U R                  5       nUR                  S:X  d  M  U R                  U/ SQ/ SQU5      nUc  M  US	   R                  S   U R                  5       R                  S   R                  :X  d  GM!  [        R                  " SUR                  S[        UR                  5      S-
   UR                  UR                  S
-   S9nSUl        UR                  R                  [        R                  " SU R                  5      /5        U R!                  XR#                  U5      R                  5        UR%                  U5        GM     U R'                  U5        g )Nrr   r   r   )r   	ReduceSumr   )r   ConstantOfShapeConcat	UnsqueezeGatherShaper   )r   r   r   r   )r   r   r   r   r   _remove_mask)r   outputsrx   rl   r(   )r   r   rm   r   r   ru   rx   r   r"   	make_noderv   rn   	attributer   make_attributer(   add_nodeget_graph_by_noder   r   )r1   r   r   ro   op_input_idr|   parent_nodescastconstantOfShapeconcat	unsqueezegatherr   attention_nodes                 r3   clean_graphBertOnnxModel.clean_graph  s   "668JJLD 78aVWXK||{*-#55 1aA&'   + %'!{{1~););A)>)C)CC38<<?--a0.2.F.F.H+||{*
  $55E '	   +#B'--a0DJJL4F4Fq4I4N4NN)/)9)9'#'::a#djj/A2E#F$(KK!%^!;	* 1@-&00779N9N{\`\j\j9k8lmn6L6L^6\6a6ab'..t4y !z 	/*r5   c                 D    U R                  5         U R                  5         g r7   )r   prune_graphr?   s    r3   postprocessBertOnnxModel.postprocessF  s    r5   Noptionsadd_dynamic_axesc                    Ub!  UR                   (       d  U R                  5         U R                  R                  5         U R                  R	                  5         U R                  5         Ub  UR                  (       a   U R                  5         U R                  5         Ub  UR                  (       a  U R                  5         U R                  5         U R                  5         Ub  UR                  (       a+  U R                  UR                   5        U R                  5         Ub  UR                   (       a  U R#                  5         Ub  U R$                  R'                  UR(                  5        UR*                  (       a[  [-        U R.                  [0        5      (       d<  [3        U U R4                  U R6                  U R$                  UR*                  5      U l        Ub  UR8                  (       a  U R;                  5         Ub  UR<                  (       a  U R?                  5         U RA                  5         Ub  URB                  (       a.  UR(                  [D        RF                  :H  nU RI                  U5        U R                  RK                  5         U RM                  5         Ub  URN                  (       a  U RQ                  SS9  U RQ                  SS9  Ub  URR                  (       a  U RU                  5         Ub!  URV                  (       a  U RY                  5         Ub!  URZ                  (       a  U R]                  5         U R_                  5         U(       a  U Ra                  5         [b        Re                  SU Rg                  5        35        g )NT)rF   Fzopset version: )4enable_shape_inferencedisable_shape_inferencer0   remove_identity_nodesremove_useless_cast_nodesr;   enable_layer_normr]   r`   enable_gelurC   r   rS   enable_skip_layer_normrd   rg   enable_rotary_embeddingsr~   r-   set_mask_formatattention_mask_formatuse_multi_head_attention
isinstancer.   r   r   r)   r(   enable_attentionr@   enable_qordered_matmulr   rV   enable_embed_layer_normr   MaskIndexEndrZ   remove_useless_reshape_nodesr   enable_bias_gelurG   enable_bias_skip_layer_normrP   enable_gelu_approximationrJ   enable_gemm_fast_gelurM   remove_unused_constantr   r   r   get_opset_version)r1   r   r   rY   s       r3   optimizeBertOnnxModel.optimizeJ  s   )G)G((*

((* 	

,,. 	!O 9 9  "++-O 3 3NNO > >%%g&D&DE002O @ @'')//0M0MN//
4CXCXZm8n8n(7$$NN''44)% O 8 8! O > >%%'O ? ?$::>Q>^>^^N!!.1 	

//1 O 8 8D1E2O C C..07#D#D##%7#@#@$$&##% !!#od&<&<&>%?@Ar5   c                     0 n/ SQn/ SQnX#-    H!  nU R                  U5      n[        U5      X'   M#     [        R                  SU 35        U$ )z(
Returns node count of fused operators.
)r   r   MultiHeadAttentionGeluFastGeluBiasGeluGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationrk   )QOrderedAttentionQOrderedGeluQOrderedLayerNormalizationQOrderedMatMulzOptimized operators: )r   rv   r   r   )r1   op_countopsq_opsopr   s         r3   get_fused_operator_statistics+BertOnnxModel.get_fused_operator_statistics  s[     

 +B--b1Eu:HL  	+H:67r5   c                   ^ Tc  U R                  5       mS[        4U4S jjnU" S5      nU" S5      U" S5      -   U" S5      -   nU" S5      U" S5      -   U" S	5      -   nU" S
5      U" S5      -   nU" S5      U" S5      -   nUS:  =(       a.    US:  =(       a"    XE:H  =(       a    USU-  :  =(       d    USU-  :  nUS:X  a  [        R                  S5        US:X  a  [        R                  S5        US:X  a  [        R                  S5        US:X  a  [        R                  S5        US:X  a  [        R	                  S5        U$ )z1
Returns True when the model is fully optimized.
op_namec                 8   > TR                  U 5      =(       d    S$ )Nr   )get)r  fused_op_counts    r3   r  2BertOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.3!3r5   r   r   r  r  r	  r  r
  r  r  r  r  r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r  strr   debugwarning)	r1   r  r  embed	attentiongelu
layer_normsimple_layer_norm
is_perfects	    `       r3   is_fully_optimized BertOnnxModel.is_fully_optimized  sT    !!??AN	4c 	4 23[)H5I,JJXViMjj	(:"66*9MM23h?W6XX
$%CDxPrGss QY XQX"X I-V3DI3U	 	 ?LL89!LL?@19LL78A:LL<=>NNHIr5   use_symbolic_shape_inferc                 <    [        U 5      nUR                  U5        g r7   )r   convert)r1   r,  packing_modes      r3   convert_to_packing_mode%BertOnnxModel.convert_to_packing_mode  s    "4(56r5   )r.   r-   r)   r(   r/   r0   )r   r   )T)
batch_sizemax_seq_len)NFr7   )F))__name__
__module____qualname____firstlineno__r    intr,   r;   r@   rC   rG   rJ   rM   rP   rS   rV   rZ   r]   r`   rd   rg   r~   r   r!  rs   boolr   r   r   r   r   r   r   r   r   r  r  r*  r0  __static_attributes____classcell__)r2   s   @r3   r%   r%   %   s    'j 'S '3 ' '*/
		(s 4PS9 ^b , 

4('UR@+DRB 4 RBt RBh@&P7 7 7r5   r%   N)>loggingr   r0  r   fusion_attentionr   r   fusion_bart_attentionr   fusion_biasgelur   fusion_constant_foldr	   fusion_embedlayerr
   fusion_fastgelur   fusion_gelur   fusion_gelu_approximationr   fusion_gemmfastgelur   fusion_layernormr   r   fusion_optionsr   r   fusion_qordered_attentionr   fusion_qordered_gelur   fusion_qordered_layernormr   fusion_qordered_matmulr   fusion_quickgelur   fusion_reshaper   fusion_rotary_attentionr   fusion_shaper   fusion_simplified_layernormr   r   fusion_skiplayernormr   r   fusion_utilsr   onnxr    r!   r"   
onnx_modelr#   r4  r   r%    r5   r3   <module>rV     sp     / ; 5 * 3 ; * " = 2 Q = = 3 F 7 , ( : $ r _ $ 0 0  	8	C7I C7r5   