
    hR                        S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
  S SKJrJr  S SKJr  S SKJrJrJrJrJr  S S	KJr  \" \5      r " S
 S5      r " S S5      r " S S5      r " S S5      r " S S5      r " S S5      r " S S5      r  " S S5      r! " S S\5      r" " S S\5      r# " S S\"5      r$ " S  S!\"5      r% " S" S#\"5      r& " S$ S%\"5      r' " S& S'\5      r(g)(    )	getLoggerN)DynamoOnnxHelper)Fusion)AttentionOpTypeFusionOptions) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)NumpyHelper)
ModelProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                       \ rS rSrS rSrg)ProcessGemmWFunc   c                 0    [         R                  " US5      $ )N   r   )np	transposeselfxs     a/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/onnx_model_phi.py__call__ProcessGemmWFunc.__call__   s    ||Av&&     N__name__
__module____qualname____firstlineno__r   __static_attributes__r    r   r   r   r      s    'r   r   c                       \ rS rSrS rSrg)ProcessMatMulQFunc   c                 b    [         R                  " [         R                  " USS5      S   S5      $ )N   r   r   r   r   splitr   s     r   r   ProcessMatMulQFunc.__call__   %    ||BHHQ1-a0&99r   r    Nr!   r    r   r   r(   r(          :r   r(   c                       \ rS rSrS rSrg)ProcessMatMulKFunc   c                 b    [         R                  " [         R                  " USS5      S   S5      $ )Nr+   r   r   r   r,   r   s     r   r   ProcessMatMulKFunc.__call__   r/   r   r    Nr!   r    r   r   r2   r2      r0   r   r2   c                       \ rS rSrS rSrg)ProcessMatMulVFunc#   c                 b    [         R                  " [         R                  " USS5      S   S5      $ )Nr+   r      r   r,   r   s     r   r   ProcessMatMulVFunc.__call__$   r/   r   r    Nr!   r    r   r   r7   r7   #   r0   r   r7   c                       \ rS rSrS rSrg)ProcessBiasQFunc(   c                 <    [         R                  " USS5      S   nU$ )Nr+   r   r   r-   r   s     r   r   ProcessBiasQFunc.__call__)       HHQ2q!r   r    Nr!   r    r   r   r=   r=   (       r   r=   c                       \ rS rSrS rSrg)ProcessBiasKFunc.   c                 <    [         R                  " USS5      S   nU$ )Nr+   r@   r   rA   r   s     r   r   ProcessBiasKFunc.__call__/   rC   r   r    Nr!   r    r   r   rF   rF   .   rD   r   rF   c                       \ rS rSrS rSrg)ProcessBiasVFunc4   c                 <    [         R                  " USS5      S   nU$ )Nr+   r@   r:   rA   r   s     r   r   ProcessBiasVFunc.__call__5   rC   r   r    Nr!   r    r   r   rK   rK   4   rD   r   rK   c                       \ rS rSrS rSrg)ProcessRotCacheFunc:   c                 x    [        UR                  5      S:X  d   eUR                  S   S:X  a  US S 2SS24   $ U$ )Nr:   r       r      )lenshaper   s     r   r   ProcessRotCacheFunc.__call__;   s?    177|q   771:Q"W:r   r    Nr!   r    r   r   rP   rP   :   s    r   rP   c                     ^  \ rS rSrS\S\\   4U 4S jjrS\4S jr	S r
S rS	 rS
 rS S jrS rS rS rS\\   S\S\\   4S jrS!S\\   S\\   S\4S jjrS!S\\   S\\   S\4S jjrS"S\\   S\\   S\4S jjrS!S\\   S\\   S\4S jjrS!S\\   S\\   S\4S jjrS#S\\   S\\   S\4S jjrS#S\\   S\\   S\4S jjrS#S\\   S\\   S\4S jjr    S$S\\   S\\   S\4S jjrSrU =r$ )%FissionD   modelnodes_to_findc                 (   > [         TU ]  USU5        g )NDONOTUSEsuper__init__)r   r[   r\   	__class__s      r   ra   Fission.__init__E   s    
 	
M:r   attn_op_typec                     Xl         g Nrd   )r   rd   s     r   set_attention_op_typeFission.set_attention_op_typeL   s    (r   c                 $    US-   [        U5      -   $ )N_)str)r   layer_idnames      r   	get_unameFission.get_unameO   s    czCM))r   c                     U H8  nX2:X  d.  UR                  U5      (       d  UR                  U5      (       d  M6  Us  $    [        SU S35      e)NzEdge z
 not found)endswith
startswith
ValueError)r   edgesrn   edges       r   get_edge_by_nameFission.get_edge_by_nameR   sH    D|t}}T22dood6K6K  5j122r   c                 :    U R                  UR                  U5      $ rf   )rw   inputr   nodern   s      r   get_input_by_nameFission.get_input_by_nameX   s    $$TZZ66r   c                 :    U R                  UR                  U5      $ rf   )rw   outputr{   s      r   get_output_by_nameFission.get_output_by_name[   s    $$T[[$77r   c                 ~   U R                   R                  U5      n[        R                  " U5      nU" U5      n[        R
                  " Uc  US-   OU[        R                  UR                  UR                  5       R                  5       SS9nU R                   R                  XpR                  5        UR                  $ )N
_processedT	data_typedimsvalsraw)r[   get_initializerr
   to_arrayr   make_tensorr   FLOATrV   flattentobytesadd_initializerthis_graph_namern   )r   initializer_namefunctorcustom_namei
i_np_arrayprocessed_i_np_array
new_tensors           r   process_initializerFission.process_initializer^   s    JJ&&'78 ))!,
&z2''/:/B|+!''%++%--/779

 	

"":/C/CDr   c                     U R                   R                  5       R                  R                  5       nXl        [
        R                  UR                  R                  l	        g rf   )
r[   graph
value_infoaddrn   r   r   typetensor_type	elem_typer   rn   new_value_infos      r   add_fp32_value_infoFission.add_fp32_value_infol   C    ))+66::<"4?4E4E''1r   c                     U R                   R                  5       R                  R                  5       nXl        [
        R                  UR                  R                  l	        g rf   )
r[   r   r   r   rn   r   INT64r   r   r   r   s      r   add_int64_value_infoFission.add_int64_value_infoq   r   r   c                    U R                   R                  5       R                   HH  nUR                  U:X  d  M  U R                   R                  5       R                  R	                  U5          O   [
        R                  " U[        R                  US9nU R                   R                  5       R                  R                  U/5        g )Nr   rV   )
r[   r   r   rn   remover   make_tensor_value_infor   r   extend)r   rn   rV   r   r   s        r   replace_fp32_value_infoFission.replace_fp32_value_infov   s    ****,77J$&

  "--44Z@ 8  66!''

 	

%%,,n-=>r   subgraph_nodesrm   layer_known_edges_namesc                 z   U GH4  n[        UR                  5       HP  u  pVUS:X  a  M  Xc;  d  M  U R                  X&5      UR                  U'   U R                  UR                  U   5        MR     [        UR                  5       HP  u  pVUS:X  a  M  Xc;  d  M  U R                  X&5      UR                  U'   U R                  UR                  U   5        MR     U R                  X$R
                  5      Ul        U R                  R                  U5        U R                  U R                  UR
                  '   GM7     g )N )
	enumeraterz   ro   r   r   rn   nodes_to_addappendr   node_name_to_graph_name)r   r   rm   r   new_noder   rn   s          r   set_unique_name_and_add_nodes%Fission.set_unique_name_and_add_nodes   s     'H$X^^42:8(,x(FHNN1%,,X^^A->? 5 %X__52:8)-)GHOOA&,,X__Q-?@ 6 !NN8]]CHM$$X.:>:N:ND((7 'r   inputsoutputsprefixc                     [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SS9nU/$ )Nr+   r   LayerNormalization_LayerNormalizationg   >)r   r   rn   epsilonrU   r   	make_noder   r   r   r   r|   s        r   	layernormFission.layernorm   sT    6{a7|q    //)
 vr   c                     [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUS   US   /US-   /US-   S9n[        R                  " SUS-   US   /UUS	-   S9nXE/$ )
Nr+   r   MatMulr   
matmul_outr   r   rn   Addr:   Biasr   )r   r   r   r   matmulr   s         r   gemmFission.gemm   s    6{a7|q   !!1Ivay)l*+("	
 \)6!95&	
 }r   c           
          [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SUUS9nU/$ )N   r   RotaryEmbeddingcom.microsoft)r   r   rn   domainrotary_embedding_dim	num_headsr   )r   r   r   r   rot_dimr   r|   s          r   rotaryFission.rotary   sZ    6{a7|q   ++"!(
 vr   c                     [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SS9nU/$ )Nr   FastGelur   )r   r   rn   r   r   r   s        r   fastgeluFission.fastgelu   sS    6{a7|q   *$"
 vr   c                     [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   S9nU/$ )Nr:   r   r   r   r   r   s        r   r   Fission.add   sO    6{a7|q   %	
 vr   c           
          [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SUSS9nU/$ )N   r+   MultiHeadAttentionr   r   )r   r   rn   r   r   unidirectionalr   r   r   r   r   r   r|   s         r   mhaFission.mha   sZ    6{a7|q    .."
 vr   c           
          [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SUUS9nU/$ )N   r+   GroupQueryAttentionr   )r   r   rn   r   r   kv_num_headsr   r   s         r   gqaFission.gqa   sZ    6{a7|q   !//""
 vr   c                     [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SUSSSS9	nU/$ )N   r:   	Attentionr   r   rS   )r   r   rn   r   r   r   	do_rotaryr   r   r   s         r   	attentionFission.attention   s_    6{a7|q   +%"!#

 vr   c                     [        U5      S:X  d   e[        U5      S:X  d   e[        R                  " SUUUS-   SUUUUS9	nU/$ )N   r   PagedAttentionzvllm.ort.ext)r   r   rn   r   r   num_kv_heads	head_sizescaler   )r   r   r   r   r   r   r   r|   s           r   
paged_attnFission.paged_attn  sb     6{a7|q   **!"

 vr   rg   rf   )r   )r   rS   rS   )r   rS   )r   rS   P   g   %?) r"   r#   r$   r%   r   listrl   ra   r   rh   ro   rw   r}   r   r   r   r   r   r   intr   r   r   r   r   r   r   r   r   r   r&   __classcell__rb   s   @r   rY   rY   D   s   ;; Cy;)/ )*378F
F

?O"9oO9<OW[\_W`O(
S	 
DI 
s 
49 tCy # "T#Y c C 
tCy 
49 
c 
	$s) 	d3i 	 	$s) d3i  $s) d3i  S	 DI s ( !S	 c 	 r   rY   c                   h   ^  \ rS rSrS\S\S\4U 4S jjrS\4S jrS r	S	\
4S
 jrS	\
4S jrSrU =r$ )Phi2PreProcessori  r[   r   hidden_sizec                 X   > [         TU ]  U5        SU l        X l        X0l        SU l        g )NrS   modeling_phi_PhiModel_model_1)r`   ra   num_hidden_layersnum_attention_headsr  	func_namer   r[   r   r  rb   s       r   ra   Phi2PreProcessor.__init__  s+    !##, &8r   returnc                    0 nSUS'   SUS'   SUS'   SUS'   [        S	U R                  S	5       H1  nS
U 3USU 3'   SU 3USU 3'   SU 3USU S3'   SU 3USU S3'   M3     U R                  R                  R                   Vs/ s H  o3R
                  PM     nnSU;   a  SU;   a  SUS'   SUS'   U$ SU;   a  SU;   d   eSUS'   SUS'   U$ s  snf )Nlogits	lm_head_1	input_idsl_input_ids_
past_key_0
key_statespast_value_0value_statesr   	past_key_key_states_past_value_value_states_present_key_model_layers__1present_value__1_1model_layers_0_1_1model_layers_0_1_2present_key_0present_value_0model_layers_0_1)ranger  r[   r   r   rn   )r   	edge_dictr   or   s        r   get_phi2_edge_dict#Phi2PreProcessor.get_phi2_edge_dict&  s<   	!)	+$/	.!".	,$2	.!q$00!4A-6qc?IA3'(/:1#->IaS)*1=aS/AIaS+,3A!1EIaS-.	 5 $(::#3#3#:#:;#:a66#:;7*/Cw/N.=I*+.?I*+
  &05IW5TTT,;I().?I*+ <s   Cc                     SnU R                   R                  R                   H:  nUR                  R	                  U5      nUS:w  d  M&  UR                  US  Ul        M<     g )N)modeling_phi_PhiDecoderLayer_model_layersr@   )r[   r   r|   op_typefind)r   phi2_transformer_layer_namer|   indexs       r   simplify_phi2_op_type&Phi2PreProcessor.simplify_phi2_op_type<  sP    &Q#JJ$$))DLL%%&ABE{#||EF3 *r   rd   c                 
   U[         R                  :H  U l        U[         R                  :H  U l        U R
                  R                  n/ nUR                   GHS  nSUR                  ;   Ga  [        R                  " UR                  U R                  (       d  [        R                  O[        R                  SS/S9n[        R                  " S[        R                  S/S9n[        R                  " S[        R                  SS/S9n[        R                  " S[        R                  SS/S9n[        R                  " S	[        R                  S/S9n	U R                  (       d  UR                  XVU/5      OUR                  XWU	/5        U R                  (       a  S
UR                  ;   a  [        R                  " UR                  R                  S
S5      UR                   R"                  R$                  SSU R&                  SU R(                  U R&                  -  /S9n
UR                  U
/5        GM  GM  U R                  (       a  S
UR                  ;   aR  [        R                  " UR                  UR                   R"                  R$                  / SQS9n
UR                  U
/5        SUR                  ;   aU  [        R                  " UR                  UR                   R"                  R$                  / SQS9n
UR                  U
/5        GM  GM  S
UR                  ;   d  SUR                  ;   d  GM  [        R                  " UR                  UR                   R"                  R$                  SU R&                  SU R(                  U R&                  -  /S9n
UR                  U
/5        GMV     UR+                  S5        UR                  R                  U5        / n[-        UR.                  5       GHS  u  pUS:X  a  UR                  U/5        M   U R                  (       a  SUR                  ;   a  [        R                  " UR                  R                  SS5      UR                   R"                  R$                  SSU R&                  SU R(                  U R&                  -  /S9n
UR                  U
/5        M  M  U R                  (       a  M  [        R                  " UR                  UR                   R"                  R$                  SU R&                  SU R(                  U R&                  -  /S9n
UR                  U
/5        GMV     UR+                  S5        UR.                  R                  U5        g )Nr  
batch_sizeseq_lenr   stepr   position_idsattention_maskinput_metadatapast_keypastr:   past_seq_len)
num_blocksr   head_size_x
block_sizeblock_x
past_value)r;  r   r   r=  rz   r   present_keypresenttotal_seq_lenr   )r   r   use_attnr   use_vllmr[   r   rz   rn   r   r   r   INT32r   r   replacer   r   r   r  r  
ClearFieldr   r   )r   rd   r   
new_inputsvivi_iidvi_stepvi_pidvi_maskvi_metavi_cachenew_outputsr   s                r   process_graph_io!Phi2PreProcessor.process_graph_ioC  sm   $(A(AA$(F(FF

  
++Bbgg%66GG7;}}k//+J[J['3
 !77)//#
  66")//'3
 !77$)//'3
 !77$)//#  == %%v&@A#**FG+DE}}(%<<
F;"$''"5"5"?"?( 44* ,,0H0HH
 H %%xj1 ) (%<<"$''"5"5"?"?a H
 %%xj1277*%<<"$''"5"5"?"?	 H %%xj1 + (LBGG,C%<<"$''"5"5"?"?( 44* ,,0H0HH		 H %%xj1] ` 	!:&u||,EAAv""B4(==$/#)#@#@GGOOM9E&(gg&9&9&C&C ! , $ 8 8 / $ 0 0D4L4L L#
$ $**H:6 0 ]]%<<"$''"5"5"?"?( 44+ ,,0H0HH		 H  &&z2? -B 	"K(r   c                    S nU R                   R                   H;  nUR                  R                  U R                  5      (       d  M/  UR                  n  O   Uc   eU R                  U5        U R                  U R                  5       5        U R                  5         U R                  5         U[        R                  :X  a  U R                  5         U R                  U5        g rf   )r[   	functionsrn   rr   r	  unroll_functionupdate_edgesr'  r/  remove_dropout_layerr   r   remove_lm_head_layerrQ  )r   rd   function_namefuncs       r   preprocess_onnx Phi2PreProcessor.preprocess_onnx  s    JJ((Dyy!!$..11 $		 ) (((]+$1134""$!!#?999%%'l+r   )r	  r  r  r  rC  rD  )r"   r#   r$   r%   r   r   ra   dictr'  r/  r   rQ  r[  r&   r   r  s   @r   r  r    sN    9j 9S 9s 9D ,4{)_ {)z,O , ,r   r  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )FissionTransformerEmbeddingPhii  r[   c                 (   > [         TU ]  US/5        g )N6torch_nn_modules_sparse_Embedding_model_embed_tokens_1r_   r   r[   rb   s     r   ra   'FissionTransformerEmbeddingPhi.__init__  s     	!Y Z[r   c                    [         R                  SUR                  5        [        UR                  5      S:X  d   e[        UR
                  5      S:X  d   eUR                  S   nUR
                  S   nU R                  US5      nXEU/n[        R                  " SXd/U/SS9/nU R                  USU5        U R                  R                  U5        S	U l        g )
NOptimizing %s...r:   r   r   zembed_tokens.weightGatherEmbedding_Gatherr   T)loggerinforn   rU   rz   r   r}   r   r   r   nodes_to_remover   prune_graph)	r   r|   input_name_to_nodesoutput_name_to_noderz   r   	embeddingr   r   s	            r   fuse#FissionTransformerEmbeddingPhi.fuse  s    &		24::!###4;;1$$$

1Q**41FG	#()"< !)'	
 	**>1>UV##D)r   rk  	r"   r#   r$   r%   r   ra   ro  r&   r   r  s   @r   r_  r_    s    \\   r   r_  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )FissionTransformerLayerNormPhii  r[   c                 (   > [         TU ]  US/5        g )N@torch_nn_modules_normalization_LayerNorm_model_final_layernorm_1r_   rb  s     r   ra   'FissionTransformerLayerNormPhi.__init__  s     	!c der   c                 D   [         R                  SUR                  5        [        UR                  5      S:X  d   e[        UR
                  5      S:X  d   eUR                  S   nUR
                  S   nU R                  US5      nU R                  US5      nXEXg/n/ n	U	R                  U R                  XFU/U/S5      5        U R                  U	SU5        U R                  U/ S	Q5        U R                  U/ S	Q5        U R                  R                  U5        S
U l        g )Nre  r+   r   r   zfinal_layernorm.weightzfinal_layernorm.biasFinalc   r2  r3  r  T)rh  ri  rn   rU   rz   r   r}   r   r   r   r   rj  r   rk  )
r   r|   rl  rm  rz   r   	ln_weightln_biasr   r   s
             r   ro  #FissionTransformerLayerNormPhi.fuse  s   &		24::!###4;;1$$$

1Q**41IJ	((/EF#()"Ednne-H6(T[\]**>2?VW$$U,TU$$V-UV##D)r   rq  rr  r  s   @r   rt  rt    s    ff   r   rt  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )!FissionTransformerCausalLMHeadPhii  r[   c                 (   > [         TU ]  US/5        g )N(torch_nn_modules_linear_Linear_lm_head_1r_   rb  s     r   ra   *FissionTransformerCausalLMHeadPhi.__init__  s     	!K LMr   c                 t   [         R                  SUR                  5        [        UR                  5      S:X  d   e[        UR
                  5      S:X  d   eUR                  S   nUR
                  S   nU R                  U R                  US5      [        5       5      nU R                  US5      nXEXg/n/ n	U	R                  U R                  XFU/U/S5      5        U R                  U	S	U5        U R                  U/ S
Q5        U R                  U/ SQ5        U R                  R                  U5        SU l        g )Nre  r   r   r:   r   zlm_head.weightzlm_head.biasLMHead_rz  r{  )r2  r3  i   T)rh  ri  rn   rU   rz   r   r   r}   r   r   r   r   r   rj  r   rk  )
r   r|   rl  rm  rz   r   	fc_weightfc_biasr   r   s
             r   ro  &FissionTransformerCausalLMHeadPhi.fuse  s   &		24::!###4;;1$$$

1Q,,T-C-CDJZ-[]m]op	((~>#()"Edii7(CfXyYZ**>2?VW$$U,TU$$V-MN##D)r   rq  rr  r  s   @r   r  r    s    NN   r   r  c                   L   ^  \ rS rSrS\S\4U 4S jjrS rS rS r	S r
S	rU =r$ )
FissionTransformerBlockPhii4  r[   r   c                    > X l         Sn0 U l        / n[        U5       H(  nSU S3nUR                  U5        XPR                  U'   M*     [        TU ]  X5        g )NrS   *modeling_phi_PhiDecoderLayer_model_layers_r  )r   func_to_layer_idr$  r   r`   ra   )r   r[   r   max_num_layersr\   layerr	  rb   s          r   ra   #FissionTransformerBlockPhi.__init__5  sd    
 # ">*EDUG2NI  +/4!!), +
 	.r   c                 4    U R                   UR                     $ rf   )r  r+  )r   r|   s     r   get_layer_id'FissionTransformerBlockPhi.get_layer_idE  s    $$T\\22r   c                    [         R                  " SS/S/S[        R                  S9[         R                  " SSS/S/S	S
9[         R                  " SSS/S/SS
9[         R                  " SS/S/S[        R                  S9[         R                  " SS/S/SS
9[         R                  " SSS/S/SSS9[         R                  " SS/S/S[        R                  S9/nU$ )NCastr6  
mask_int64Cast_gqa_aux_0)r   r   rn   to	ReduceSumonemask_row_sumsReduceSum_gqa_auxr   Subseqlens_k_int64Sub_gqa_aux	seqlens_kCast_gqa_aux_1Shape
mask_shapeShape_gqa_aux_0rf  total_seq_len_int64Gather_gqa_aux_0r   )r   r   rn   axistotal_sequence_lengthCast_gqa_aux_2)r   r   r   r   rE  )r   gqa_aux_nodess     r   get_gqa_aux_nodes,FissionTransformerBlockPhi.get_gqa_aux_nodesH  s   ()%%$$ $e,()(	 '/*+"	 )*$%$$ Wl^l^Zkl$e,./' -.01%$$G*
V r   c	                 l   U R                   R                  U5      n	U R                   R                  U5      n
U R                   R                  U5      n[        R                  " [        R
                  " U	5      S5      n[        R                  " [        R
                  " U
5      S5      n[        R                  " [        R
                  " U5      S5      n[        R                  " XU4SS9nU R                   R                  U5      nU R                   R                  U5      nU R                   R                  U5      n[        R
                  " U5      n[        R
                  " U5      n[        R
                  " U5      n[        R                  " UUU4SS9nUR                  S   n[        R                  " U[        R                  UUS-  /UR                  5       R                  5       SS9nU R                   R                  UU R                  5        [        R                  " U[        R                  US-  /UR                  5       R                  5       SS9nU R                   R                  UU R                  5        U R!                  UR"                  5        U R!                  UR"                  5        Xx4$ )Nr   r   )r  r   r+   Tr   )r[   r   r   r   r
   r   stackrV   r   r   r   r   r   r   r   r   r   rn   )r   q_wk_wv_wq_bk_bv_bweight_name	bias_nameq_weightk_weightv_weightqwkwvw
qkv_weightq_biask_biasv_biasqbkbvbqkv_biasr  weightbiass                             r   pack_qkv_gemm(FissionTransformerBlockPhi.pack_qkv_gemmv  s   ::--c2::--c2::--c2\\+..x8&A\\+..x8&A\\+..x8&AXXrrl3
++C0++C0++C0!!&)!!&)!!&)88RRLq1 &&q)##!''{Q/##%--/
 	

""64+?+?@!!!''/"!!#++-
 	

""4)=)=>  -  +%%r   c                    [         R                  SUR                  5        [         R                  SU R                   35        U R	                  U5      nUR
                  S   nU R                  US5      nU R                  US5      nUR                  S   nU R                  US5      n	U R                  US5      n
U R                  US	5      nU R                  US
5      nSu  pnnnnSu  nnSu  nnU R                  [        R                  :w  Ga	  U R                  U R                  US5      [        5       5      nU R                  U R                  US5      [        5       5      nU R                  U R                  US5      [        5       5      nU R                  US5      nU R                  US5      nU R                  US5      nU R                  U R                  US5      [        5       5      nU R                  U R                  US5      [        5       5      nOU R                  U R                  US5      U R                  US5      U R                  US5      U R                  US5      U R                  US5      U R                  US5      U R                  US5      U R                  US5      5      u  nnU R                  U R                  US5      [        5       5      nU R                  US5      nU R                  U R                  US5      [        5       5      nU R                  U R                  US5      [        5       5      nU R                  US5      nU R                  US5      n/ nUR!                  XVU/5        UR!                  XU
/5        UR!                  X/5        U R                  [        R                  :w  a  UR!                  UUUUUUUU/5        OUR!                  UU/5        UR!                  UUUUUU/5        UR!                  / SQ5        / nUR!                  U R#                  X[U/S/5      5        UR!                  U R%                  SUU/S /S!5      5        UR!                  U R%                  SUU/S"/S#5      5        UR!                  U R'                  S"/S$/5      5        UR!                  U R%                  S$UU/S%/S&5      5        UR!                  U R)                  S S%/S'/S(5      5        UR!                  U R)                  US'/U/S)5      5        U R                  [        R                  :w  Gad  UR!                  U R%                  SX/S*/S+5      5        UR!                  U R%                  SUU/S,/S-5      5        UR!                  U R%                  SUU/S./S/5      5        U R                  [        R*                  :X  a  S0OS1nUR!                  U R-                  S*UUU/S2/S+5      5        UR!                  U R-                  S,UUU/S3/S-5      5        U R                  [        R.                  :X  a,  UR!                  U R1                  S2S3S.S4S5S4Xg/SX/5      5        GO~U R                  [        R2                  :X  a  UR!                  U R5                  S2S3S.UUS6S7/SX/5      5        US:X  a  U R7                  5       n U  HA  n!U R8                  R;                  U!5        U R<                  U R>                  U!R                  '   MC     U R@                  RC                  [D        RF                  " [H        RJ                  " S8/S9S:9S;S<9U R<                  5        OU R                  [        R*                  :X  a'  UR!                  U RM                  S2S3S.XgS=/S/5      5        OES>U 3n"S?U 3n#UR!                  U"U#/5        UR!                  U RO                  SUUS5U"/SU#/5      5        U RQ                  UUU5        U RS                  U/ S@Q5        U RS                  U/ S@Q5        U RT                  R;                  U5        SAU l+        g )BNre  zAttentionOpType: r   r8  r?  r@   r@  present_valuezinput_layernorm.weightzinput_layernorm.bias)NNNNNN)NNzself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.q_proj.biaszself_attn.k_proj.biaszself_attn.v_proj.biaszrotary_emb.cos_cachedzrotary_emb.sin_cachedattn_qkv_weightattn_qkv_biaszself_attn.dense.weightzself_attn.dense.biaszmlp.fc1.weightzmlp.fc2.weightzmlp.fc1.biaszmlp.fc2.bias)r6  r4  r  r  r7  r5  ln_outattn_outattn_add_outOutProj_fc1_outFC1_gelu_outfc2_outFC2_residual_1_out
Residual_1
Residual_2queryQ_keyK_valueV_r5  r4  	query_rotkey_rotr   r6  r  r  r   int64)dtyper  )rn   r7  past_present_r{  T),rh  ri  rn   rd   r  rz   r}   r   r   r   r   r   r   rP   r  ro   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r[   r   r   
from_arrayr   arrayr   r   r   r   rj  rk  )$r   r|   rl  rm  rm   i_hidden_statesi_key_cachei_value_cacheo_hidden_stateso_key_cacheo_value_cacher|  r}  attn_q_weightattn_q_biasattn_k_weightattn_k_biasattn_v_weightattn_v_biasr  r  	cos_cache	sin_cacheattn_out_weightattn_out_biasmlp_fc1_weightmlp_fc2_weightmlp_fc1_biasmlp_fc2_biasr   r   pos_ids_namer  r   	past_namepresent_names$                                       r   ro  FissionTransformerBlockPhi.fuse  s.    	&		2'(9(9':;<$$T***Q-,,T:>..t\B++b/--dMB//oF**41IJ	((/EF^
ZM;{ *4&)	9 9 99 44&&t-FGIYI[M !44&&t-FGIYI[M !44&&t-FGIYI[M 007NOK007NOK007NOK00&&t-DEGZG\I 00&&t-DEGZG\I .2-?-?&&t-FG&&t-FG&&t-FG&&t-DE&&t-DE&&t-DEx):;x9	.*O] 22""4)ABDTDV
 ..t5KL11$2H2HO_2`brbtu11$2H2HO_2`brbtu--dNC--dNC"$&&m'TU&&m'TU&&	';< 9 99#**!!!	 $**O]+KL&&m^\>[gh	
 	 &&n	
 dnno'-RU]T^_`dii_m(TWeVfhrstdii><(PS\R]_efgdmmYK*FGdii^\(RU^T_aghidhh	'BEUDVXdefdhh9I'J_L]_klm 9 99!!$))X},RU\T]_c"de!!$))X}k,RUZT[]a"bc!!$))X}k,RU\T]_c"de-1->->/B`B`-`>flL!!$++wiQZ.[^i]jlp"qr!!$++ulIy.Y\e[fhl"mn  O$F$FF%%HH$i">NPRT_o#[@ ""o&I&II%%HH'%#')'3 $[@ q=$($:$:$<M$1))00:FJFZFZ44X]]C %2 JJ..$//!G0LSXY[_[o[o ""o&D&DD%%OO$i+Vfg#  z*I%hZ0L#**I|+DE!!?OQZ[^hjv]w 	**>8E\]$$_6^_$$_6^_##D)r   )r  r   rk  )r"   r#   r$   r%   r   r   ra   r  r  r  ro  r&   r   r  s   @r   r  r  4  s7    // / 3,\(&Tk  k r   r  c                   j   ^  \ rS rSrS\S\S\4U 4S jjrSS\S-  S\4U 4S	 jjjr	S
 r
SS jrSrU =r$ )PhiOnnxModeliN  r[   r   r  c                    > [         TU ]  U5        [        U R                  X#5      U l        [        X5      U l        [        U 5      U l        [        U 5      U l
        [        U 5      U l        g rf   )r`   ra   r  r[   phi2_preprocessorr  fission_transformer_blockr  fission_causal_lm_headrt  fission_transformer_layernormr_  fission_transformer_embeddingr
  s       r   ra   PhiOnnxModel.__init__O  sW    !1$**i!U)CD)T&&G&M#-KD-Q*-KD-Q*r   Noptionsadd_dynamic_axesc                 (  > Uc   eUR                   nU R                  R                  U5        U R                  R	                  U5        U R                  R                  5         U R                  R                  5         U R                  R                  5         U R                  R                  5         [        TU ])  5         [        U 5      U l        [        U 5      U l        U R                  R                  5         U R                  R                  5         g rf   )attention_op_typer  rh   r  r[  applyr  r  r  r`   rk  r	   fuse_slnr   fuse_bias_sln)r   r  r	  rd   rb   s       r   optimizePhiOnnxModel.optimizeW  s    """00&&<<\J..|<&&,,.**002##))+**002 5T:=dC  "r   c                     0 n/ SQnU H!  nU R                  U5      n[        U5      X'   M#     [        R                  SU 35        U$ )z(
Returns node count of fused operators.
)	r   r   r   r   GeluBiasGelur   r   SkipLayerNormalizationzOptimized operators: )get_nodes_by_op_typerU   rh  ri  )r   op_countopsopnodess        r   get_fused_operator_statistics*PhiOnnxModel.get_fused_operator_statisticsl  sR     

 B--b1Eu:HL  	+H:67r   c                   ^ Tc  U R                  5       mS[        4U4S jjnU" S5      U" S5      -   U" S5      -   U" S5      -   nU" S5      U" S5      -   U" S	5      -   nU" S
5      U" S5      -   nUS:  =(       a    X4:H  =(       a    XS:  nUS:X  a  [        R                  S5        US:X  a  [        R                  S5        US:X  a  [        R	                  S5        U$ )z1
Returns True when the model is fully optimized.
op_namec                 8   > TR                  U 5      =(       d    S$ )Nr   )get)r  fused_op_counts    r   r  1PhiOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.3!3r   r   r   r   r   r  r  r   r   r  r   zLayer Normalization not fusedzGelu (or FastGelu) not fusedz+Attention (or MultiHeadAttention) not fused)r  rl   rh  debugwarning)r   r   r  r   gelu
layer_norm
is_perfects    `     r   is_fully_optimizedPhiOnnxModel.is_fully_optimized  s     !!??AN	4c 	4 [!+,-,-. '() 	 (:"66*9MM23h?W6XX
!mZ)*;Z*BY
?LL8919LL78>NNHIr   )r  r  r  r  r  r  r  )NFrf   )r"   r#   r$   r%   r   r   ra   r   boolr  r  r'  r&   r   r  s   @r   r   r   N  sM    Rj RS Rs R# 4 #t # #*. r   r   ))loggingr   numpyr   dynamo_onnx_helperr   fusion_baser   fusion_optionsr   r   fusion_skiplayernormr   r	   fusion_utilsr
   onnxr   r   r   r   r   
onnx_modelr   r"   rh  r   r(   r2   r7   r=   rF   rK   rP   rY   r  r_  rt  r  r  r   r    r   r   <module>r3     s      /  9 _ $ I I  	8	' '
: :
: :
: :
    Vf Vrp,' p,f W  D W  B   BW  W tS9 Sr   