
    h@                        S SK r S SKrS SKrS SKrS SKJrJrJrJrJ	r	  S SK
JrJr  S SKJrJr  S SKJr  \R$                  " \5      r " S S5      r " S S	\5      r " S
 S\5      r " S S5      rS rS rS r\S:X  a  \" 5         gg)    N)AttentionInputIDsAttentionOutputIDsMultiHeadAttentionInputIDsMultiHeadAttentionOutputIDs	Operators)helper
load_model)	NodeProto	OnnxModel)SymbolicShapeInferenceHelperc                       \ rS rSrS\S\4S jrS\S-  4S jrS\S-  4S jr	S\S-  4S	 jr
S\4S
 jrS\\   S\\   SS4S jrS\\   S\\   SS4S jrS\S\SS4S jrS\S-  4S jrSS\SS4S jjrSrg)PackingAttentionBase   modelattention_op_typec                     Xl         / U l        / U l        SU l        0 U l        U R                   R                   R
                  R                  U l        X l        U R                   R                  U5      U l
        g )NF)r   nodes_to_removenodes_to_addprune_graphnode_name_to_graph_namegraphnamethis_graph_namer   get_nodes_by_op_typeattention_nodes)selfr   r   s      j/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/convert_to_packing_mode.py__init__PackingAttentionBase.__init__   sc     %
%'"$!&-/$$(JJ$4$4$:$:$?$?!2#zz>>?PQ    returnNc                    U R                   [        R                  :X  a  [        R                  O[
        R                  nU R                  5       nU(       a  [        UR                  5      U::  a  g UR                  U   nU R                   H1  n[        UR                  5      U::  d  UR                  U   U:w  d  M1    g    U$ N)r   r   	ATTENTIONr   
MASK_INDEXr   KEY_PADDING_MASK_try_getting_first_attentionleninputr   )r   
mask_indexfirst_attention_nodeattention_masknodes        r   _try_getting_attention_mask0PackingAttentionBase._try_getting_attention_mask$   s     %%)<)<< ((+<< 	
  $@@B#s+?+E+E'F*'T-33J? ((D4::*,

:0F.0X ) r    c                 T    [        U R                  5      S::  a  g U R                  S   $ )Nr   )r(   r   r   s    r   r'   1PackingAttentionBase._try_getting_first_attention8   s)    t##$)##A&&r    c                     S nU R                   R                  5        HC  nUR                  [        R                  :X  d   UR                  [        R
                  :X  d  MA  UnME     U$ r#   )r   nodesop_typer   	LAYERNORMSKIPLAYERNORM)r   last_layernorm_noder-   s      r   _try_getting_last_layernorm0PackingAttentionBase._try_getting_last_layernorm>   sP    "JJ$$&D||y222dlliF]F]6]&*# ' #"r    c                     [        5       er#   NotImplementedErrorr1   s    r   _are_attentions_supported.PackingAttentionBase._are_attentions_supportedE       !##r    inputsoutputsc                 &   [         R                  " [        R                  UUU R                  R                  [        R                  5      S9nSUl        U R                  R                  U5        U R                  U R                  UR                  '   g NrA   rB   r   com.microsoft)r   	make_noder   REMOVEPADDINGr   create_node_namedomainr   appendr   r   r   r   rA   rB   new_nodes       r   _insert_removepadding_node/PackingAttentionBase._insert_removepadding_nodeH   sp    ####,,Y-D-DE	
 *  *6:6J6J$$X]]3r    c                 &   [         R                  " [        R                  UUU R                  R                  [        R                  5      S9nSUl        U R                  R                  U5        U R                  U R                  UR                  '   g rD   )r   rG   r   RESTOREPADDINGr   rI   rJ   r   rK   r   r   r   rL   s       r   _insert_restorepadding_node0PackingAttentionBase._insert_restorepadding_nodeT   sp    ##$$,,Y-E-EF	
 *  *6:6J6J$$X]]3r    token_offsetcumulative_sequence_lengthc                     [        5       er#   r<   )r   rT   rU   s      r   )_replace_attention_with_packing_attention>PackingAttentionBase._replace_attention_with_packing_attention`   r@   r    c                 z    U R                   [        R                  :X  a  UR                  [        R
                     $ g r#   )r   r   r$   r)   r   INPUT)r   r+   s     r   _get_input_to_remove_padding1PackingAttentionBase._get_input_to_remove_paddingc   s1    !!Y%8%88'--.?.E.EFFr    use_symbolic_shape_inferc                    [         R                  S5        U R                  5       (       d  g U R                  5       nU(       d  g U R	                  5       nU R                  5       nU(       d  g U R                  U5      nU(       d  g US-   nUS-   nUS-   nUS-   n	U R                  XR/XgX/5        U R                  R                  XV5        [         R                  S5        UR                  S   S-   n
U R                  X/UR                  S   /5        U R                  R                  UR                  S   U
5        [         R                  S	UR                   S
35        U R                  Xx5        [         R                  SU R                   SU R                   35        U R                  R!                  U R"                  5        U R                  R%                  U R&                  U R(                  5        U R*                  (       a  U R                  R+                  5         O<U R"                  (       d  U R&                  (       a  U R                  R-                  5         U R                  R/                  5         U(       a\  [1        U R                  R                  SS9nUR3                  U R                  R                  SSS9nU(       a  XR                  l        g g g )Nz$start converting to packing model..._no_padding_token_offset_cumulated_seq_len_max_seq_lenz'inserted RemovePadding before Attentionr   _restore_inputz#inserted RestorePadding after last z layerz	replaced z with PackedverboseTF)
auto_mergeguess_output_rank)loggerdebugr>   r.   r'   r9   r[   rN   r   replace_input_of_all_nodesoutputrR   replace_output_of_all_nodesr5   rW   r   remove_nodesr   	add_nodesr   r   r   update_graphclean_shape_inferr   infer_shapes)r   r]   r,   r+   r8   input_to_remove_paddingoutput_without_paddingrT   cumulated_seq_lenmax_seq_lenrestorepadding_inputshape_infer_helperinferred_models                r   convertPackingAttentionBase.converth   sj   ;<--//99;#@@B">>@" #'"C"CDX"Y&!8=!H.@36JJ->''$5#3DR	
 	

--.E^>?  399!<?OO((*>)MPcPjPjklPmOno

../B/I/I!/LNbc:;N;V;V:WW]^_ 	66|Wy!7!7 8TE[E[D\]^

 4 45

T..0L0LMJJ""$!!T%6%6JJ##%

$$&# ">djj>N>NXY!Z/<<TZZ=M=MZ^rw<xN#1

   $r    )r   r   r   r   r   r   r   r   T)__name__
__module____qualname____firstlineno__r   strr   r.   r
   r'   r9   boolr>   listrN   rR   rW   r[   ry   __static_attributes__ r    r   r   r      s    Ri RC RS4Z ('i$.> '#Y-= #$4 $
Kc 
KT#Y 
KSW 
K
K$s) 
Kd3i 
KTX 
K$c $gj $os $C$J 
72 72 72 72r    r   c                   T   ^  \ rS rSrS\4U 4S jjrS\4S jrS\S\SS4S	 jr	S
r
U =r$ )PackingAttention   r   c                 B   > [         TU ]  U[        R                  5        g r#   )superr   r   r$   r   r   	__class__s     r   r   PackingAttention.__init__   s    	 3 34r    r!   c                    U R                    H  n[        R                  " US5      b    g[        R                  " US5      b    g[        R                  " US5      nUb  US:w  a    g[        UR                  5      [
        R                  :  a$  UR                  [
        R                     (       d    g[        UR                  5      [
        R                  :  d  M  UR                  [
        R                     (       a  M    g   g)Npast_present_share_bufferF	do_rotaryunidirectionalr   T)r   r   get_node_attributer(   r)   r   PASTPAST_SEQUENCE_LENGTH)r   r-   unidirection_attrs      r   r>   *PackingAttention._are_attentions_supported   s    ((D++D2MNZ++D+>J ) < <TCS T ,1Ba1G4::!2!7!77

K\KaKa@bDJJ"3"H"HH

#4#I#IJJ ) r    rT   rU   Nc           
         U R                    GH  n[        UR                  5      [        R                  :  a  UR                  [        R                     OSn[
        R                  " [        R                  UR                  [        R                     UR                  [        R                     UR                  [        R                     UUU/UR                  [        R                     /U R                  R!                  [        R                  5      S9n/ nUR"                   H&  nUR$                  S;   d  M  UR'                  U5        M(     UR"                  R)                  U5        SUl        U R,                  R'                  U5        U R.                  R'                  U5        U R0                  U R2                  UR$                  '   GM     [4        R7                  S[        U R                   5      5        g )N rE   )	num_headsqkv_hidden_sizesscalerF   z0Converted %d Attention nodes to PackedAttention.)r   r(   r)   r   ATTENTION_BIASr   rG   r   PACKEDATTENTIONrZ   WEIGHTSBIASrk   r   OUTPUTr   rI   	attributer   rK   extendrJ   r   r   r   r   rh   info)r   rT   rU   	attentionattention_biaspacked_attention
attributesattrs           r   rW   :PackingAttention._replace_attention_with_packing_attention   s   --I y'*;*J*JJ  1 @ @A 
  &//))OO$5$;$;<OO$5$=$=>OO$5$:$:; ." #))*<*C*CDEZZ001J1JK  J!++99 JJ%%d+ , &&--j9&5#$$%56  ''	2BFBVBVD(()9)>)>?; .> 	FDL`L`Habr    r   )r|   r}   r~   r   r   r   r   r>   r   rW   r   __classcell__r   s   @r   r   r      sA    5i 54 $ cc  cgj  cos  c  cr    r   c                      ^  \ rS rSrS\4U 4S jjrS\S\4S jrS\S\4S jr	S\
4S	 jrS
\S\SS4S jrS\S-  4S jrSrU =r$ )PackingMultiHeadAttention   r   c                 B   > [         TU ]  U[        R                  5        g r#   )r   r   r   MULTI_HEAD_ATTENTIONr   s     r   r   "PackingMultiHeadAttention.__init__   s    	 > >?r    indexr   c                     [        UR                  5      U:  a;  [        UR                  U   5      S:  a  [        R                  SU SU SU 35        gg)'Check a node does not have given input.r   znode input  (0) is not supported in PackedMultiHeadAttention: FT)r(   r)   rh   errorr   r-   r   r   s       r   _check_empty_input,PackingMultiHeadAttention._check_empty_input   sP    tzz?U"4::e$%){5'D69ijniopqr    c                     [        UR                  5      U:  a;  [        UR                  U   5      S:  a  [        R                  SU SU SU 35        gg)r   r   znode output r   r   FT)r(   rk   rh   r   r   s       r   _check_empty_output-PackingMultiHeadAttention._check_empty_output   sQ    t{{e#4;;u%&*|E7"TF:jkojpqrr    r!   c                    U R                    GHD  nUR                   H;  nUR                  S;  d  M  [        R	                  SUR                   SU 35            g   UR
                  [        R                     (       a9  UR
                  [        R                     (       d  [        R	                  S5          gU R                  U[        R                  S5      (       au  U R                  U[        R                  S5      (       aO  U R                  U[        R                  S5      (       a)  U R                  U[        R                  S5      (       a  GME    g   g)	Nr   mask_filter_valuer   znode attribute z/ is not supported in PackedMultiHeadAttention: Fz=packed kv format is not supported in PackedMultiHeadAttentionpast_keypresent_keyT)r   r   r   rh   r   r)   r   KEYVALUEr   PAST_KEY
PAST_VALUEr   r   PRESENT_KEYPRESENT_VALUE)r   r-   r   s      r   r>   3PackingMultiHeadAttention._are_attentions_supported   s   ((D99$OOLL?499+=lmqlr!st  '
 zz4889$**MgMmMmBn\] ''.H.Q.QS]^^++D2L2W2WYcdd,,T3N3Z3Z\ijj,,T3N3\3\^kll! )$ r    rT   rU   Nc                 r   SnU R                    GHe  n[        UR                  5      [        R                  :  a  UR                  [        R                     OSn[
        R                  " [        R                  UR                  [        R                     UR                  [        R                     UR                  [        R                     UR                  [        R                     UUU/UR                  [        R                     /U R                   R#                  [        R                  5      S9n/ nUR$                   H&  nUR&                  S;   d  M  UR)                  U5        M(     UR$                  R+                  U5        SUl        U R.                  R)                  U5        U R0                  R)                  U5        U R2                  U R4                  UR&                  '   U(       d  GM  U R                   R7                  U[        R                  5      n	U	(       d  GM  U	R8                  S:X  d  GM)  [        U	R                  5      S:X  d  GME  U	R                  R)                  U5        US-  nGMh     [:        R=                  S	[        U R                   5      5        [:        R=                  S
U5        g )Nr   r   rE   r   rF   GatedRelativePositionBias      zBConverted %d MultiHeadAttention nodes to PackedMultiHeadAttention.z=Converted %d GatedRelativePositionBias nodes to packing mode.)r   r(   r)   r   r   r   rG   r   PACKED_MULTI_HEAD_ATTENTIONQUERYr   r   r   rk   r   r   r   rI   r   r   rK   r   rJ   r   r   r   r   
get_parentr5   rh   r   )
r   rT   rU   gated_relative_pos_bias_countmhar   
packed_mhar   r   rel_pos_bias_nodes
             r   rW   CPackingMultiHeadAttention._replace_attention_with_packing_attention  s    ()%''C syy>$>$M$MM 		4CCD 
  ))55II8>>?II8<<=II8>>?II8==> ." $?$F$FGHZZ001V1VWJ J99 KK%%d+ &   ''
3 /J$$Z0  '',<@<P<PD((9 ~$(JJ$9$9#?Y?h?h$i!%%)115PP-3349%++22<@1Q61S (V 	XZ]^b^r^rZstSUrsr    c                     U R                   R                  US5      nU(       a  UR                  S:X  a  UR                  S   $ g )Nr   MatMul)r   r   r5   r)   )r   r+   matmuls      r   r[   6PackingMultiHeadAttention._get_input_to_remove_padding4  s8    &&';Q?fnn0<<?"r    r   )r|   r}   r~   r   r   r   intr   r   r   r   r>   rW   r[   r   r   r   s   @r   r   r      sy    @i @c  s # 4 *.tc .tgj .tos .t`C$J  r    r   c                   8    \ rS rSrS\4S jrS	S\SS4S jjrSrg)
PackingModei<  r   c                     Xl         g r#   r   )r   r   s     r   r   PackingMode.__init__=  s    
r    r]   r!   Nc                    U R                   R                  [        R                  5      (       aj  U R                   R                  [        R                  5      (       a  [
        R                  S5        g [        U R                   5      nUR                  U5      $ U R                   R                  [        R                  5      (       a&  [        U R                   5      nUR                  U5      $ [
        R                  S5        g )NzRPacking mode does not support both Attention and MultiHeadAttention in same graph.zPPacking mode requires either Attention or MultiHeadAttention node in onnx graph.)
r   r   r   r$   r   rh   r   r   ry   r   )r   r]   packings      r   ry   PackingMode.convert@  s    ::**9+>+>??zz..y/M/MNNqr&tzz2G??#;<<ZZ,,Y-K-KLL/

;G??#;<<LLklr    r   r{   )	r|   r}   r~   r   r   r   r   ry   r   r   r    r   r   r   <  s&    i    r    r   c                  *   [         R                  " SS9n U R                  SS[        SS9  U R                  SS[        SS9  U R                  S	S
SSS9  U R	                  S
S9  U R                  SS
SSS9  U R	                  S
S9  U R                  5       nU$ )Nz_Convert to packing mode tool for ONNX Runtime. It converts BERT like model to use packing mode.)descriptionz--inputTzinput onnx model path)requiredtypehelpz--outputzoptimized onnx model pathz	--verboseF
store_truezshow debug information.)r   actionr   rd   z--use_external_data_formatz4use external data format to store large model (>2GB)use_external_data_format)argparseArgumentParseradd_argumentr   set_defaults
parse_args)parserargss     r   _parse_argumentsr   O  s    $$uF 	DsAXY

TB]^
eLOhi
&
$C	   7DKr    c                 f    U (       a  [         R                  " SSS9  g [         R                  " SS9  g )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(funcName)20s: %(message)s)r   )coloredlogsinstallrd   s    r   _setup_loggerr   g  s*    J	

 	 =>r    c                     [        5       n [        U R                  5        [        R	                  SU  35        [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      :X  a  [        R                  S5        [        U R                  5      n[        [        U5      5      nUR                  5         UR                  R!                  U R                  U R"                  S9  g )Nz
arguments:zYSpecified the same input and output path. Note that this may overwrite the original modelr   )r   r   re   rh   ri   ospathrealpathr)   rk   warningr	   r   r   ry   r   save_model_to_filer   )r   r   packing_modes      r   mainr  q  s    D$,,
LL:dV$%	ww

#rww'7'7'DDrstzz"Ey/0L))$++PTPmPm)nr    __main__)r   loggingr   r   	constantsr   r   r   r   r   onnxr   r	   
onnx_modelr
   r   rw   r   	getLoggerr|   rh   r   r   r   r   r   r   r  r   r    r   <module>r     s      	   $ + ;			8	$F2 F2R6c+ 6cr^ 4 ^B &0?o  zF r    