
    h3                         S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSK	r	SSK
r	SSKJrJr  \R                  " S5      r " S S	5      rg)
zClass for ONNX model.    N)deque)Path   )MAXIMUM_PROTOBUFfind_by_nameneural_compressorc                      \ rS rSrSrS rS r\S 5       r\S 5       r	\	R                  S 5       r	S r\S	 5       r\R                  S
 5       r\S 5       r\S 5       r\R                  S 5       rS rS rS r\S 5       rS rS rS rS rS rS rS rS rS rS rS rS rS rS r S  r!S! r"S" r#S# r$SNS$ jr%\S% 5       r&S& r'\S' 5       r(S( r)S) r*SOS+ jr+SOS, jr,SOS- jr-S. r.S/ r/S0 r0SNS1 jr1\2S2 5       r3SPS3 jr4\2S4 5       r5SPS5 jr6S6 r7SNS7 jr8SOS8 jr9S9 r:SNS: jr;S; r<S< r=S= r>S> r?SOS? jr@    SQS@ jrA  SPSA jrBSB rCSC rD SRSD jrESE rFSF rGSG rHSH rISOSI jrJSSSJ jrKSK rLSL rMSMrNg*)T	ONNXModel&   zBuild ONNX model.c                    [        U[        5      (       d  UO[        R                  " USS9U l        [        U[        5      (       d  SOUU l        U R                  5         U R                  (       a9  U R
                  c,  UR                  SS5      (       d  [        R                  S5        U R                  (       a}  [        U[        5      (       ah  UR                  SS5      (       aQ  [        R                  R                  U R                  [        R                  R                  U R
                  5      5        SU l        [        U[        5      (       a  [        R                  R#                  [%        U5      R&                  R)                  S5      R+                  5       5      (       a=  S	S
KJn  UR1                  [%        U5      R&                  R+                  5       5      U l        0 U l        0 U l        0 U l        U R9                  U R                  R:                  R<                  5        U R?                  U R                  R:                  R<                  5        0 U l         U RC                  5         SU l"        g)a  Initialize an ONNX model.

Args:
    model (str or ModelProto): path to onnx model or loaded ModelProto model object.
    ignore_warning (bool): ignore large model warning. Default is False.
    load_external_data (bool): load external data for large model. Default is True.
F)load_external_dataNignore_warningzPModel size > 2GB. Please use model path instead of onnx model object to quantizer   Tconfig.jsonr   )
AutoConfig)#
isinstancestronnxload_model_model_pathcheck_is_large_model_is_large_modelgetloggerwarningexternal_data_helperload_external_data_for_modelospathdirname_configexistsr   parentjoinpathas_posixtransformersr   from_pretrainednode_name_counter_output_name_to_node_input_name_to_nodes_get_input_name_to_nodesgraphnode_get_output_name_to_node_graph_info_get_graph_info	_q_config)selfmodelkwargsr   s       o/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/quantization/neural_compressor/onnx_model.py__init__ONNXModel.__init__)   s    $.eS#9#9etyych?i'1%'='=45!!#D$4$4$<VZZP`bgEhEhNNmnJuc$:$:vzzJ^`d?e?e%%BB4;;PRPWPWP_P_`d`p`pPqreS!!bggnnT%[5G5G5P5PQ^5_5h5h5j&k&k/%55d5k6H6H6Q6Q6STDL!#$&!$&!%%dkk&7&7&<&<=%%dkk&7&7&<&<=    c                    SnU R                   R                  R                   H  nUR                  S5      (       a1  UR                  [
        R                  R                  :X  a	  SU l          g UR                  5       nU[        R                  " U5      -  nU[        :  d  M  SU l          g   SU l        g! [         a#  nS[        U5      ;   a  SU l         SnA  gUeSnAff = f)zCheck model > 2GB.r   data_locationTNz$exceeds maximum protobuf size of 2GBF)r   r,   initializerHasFieldr:   r   TensorProtoEXTERNALr   SerializeToStringsys	getsizeof	Exceptionr   r   )r2   	init_sizeinit
init_byteses        r5   r   ONNXModel.check_is_large_modelJ   s    	KK%%11D}}_--$2D2DHXHXHaHa2a'+$!335
S]]:66	 ++'+$# 2$  %  9SVC+/D(Gs   .)B66
C# CCC#c                     U R                   $ )z!Check the onnx model is over 2GB.)r   r2   s    r5   is_large_modelONNXModel.is_large_modela   s     ###r8   c                     U R                   $ )zReturn model path.r   rI   s    r5   
model_pathONNXModel.model_pathf        r8   c                     Xl         g)zSet model path.NrM   )r2   r   s     r5   rN   rO   k   s
      r8   c                     g)zReturn framework.onnxruntime rI   s    r5   	frameworkONNXModel.frameworkp   s    r8   c                     U R                   $ )zReturn q_config.r1   rI   s    r5   q_configONNXModel.q_configt   s     ~~r8   c                     Xl         g)zSet q_config.NrX   )r2   rY   s     r5   rY   rZ   y   s	     "r8   c                     U R                   $ )z8Return huggingface config if model is Transformer-based.)r!   rI   s    r5   	hf_configONNXModel.hf_config~   s     ||r8   c                     U R                   $ )zReturn model itself.)r   rI   s    r5   r3   ONNXModel.model   s     {{r8   c                    Xl         0 U l        U R                  5         0 U l        0 U l        U R                  U R                   R                  R                  5        U R                  U R                   R                  R                  5        g)zSet model itself.N)	r   r/   r0   r)   r*   r+   r,   r-   r.   )r2   r3   s     r5   r3   r`      si     $&!$&!%%dkk&7&7&<&<=%%dkk&7&7&<&<=r8   c                     U R                   R                  R                   Vs/ s H  oR                  PM     sn$ s  snf )zReturn input of model.)r   r,   inputnamer2   is     r5   rc   ONNXModel.input   s.     $ 1 1 7 78 71 7888   ;c                     U R                   R                  R                   Vs/ s H  oR                  PM     sn$ s  snf )zReturn output of model.)r   r,   outputrd   re   s     r5   rj   ONNXModel.output   s.     $ 1 1 8 89 81 8999rh   c                 
   0 U l         U R                  5         0 U l        0 U l        U R	                  U R
                  R                  R                  5        U R                  U R
                  R                  R                  5        g)zUpdate model info.N)	r/   r0   r)   r*   r+   r   r,   r-   r.   rI   s    r5   updateONNXModel.update   sb    $&!$&!%%dkk&7&7&<&<=%%dkk&7&7&<&<=r8   c                     U R                   $ )zEReturn ORT Graph Info object holding information about backend graph.)r/   rI   s    r5   
graph_infoONNXModel.graph_info   rP   r8   c                     U R                   R                  R                   H4  nU R                  R	                  UR
                  UR                  05        M6     g)zUpdate graph info.N)r   r,   r-   rp   rm   rd   op_typer2   r-   s     r5   r0   ONNXModel._get_graph_info   s;    KK%%**DOO""DIIt||#<= +r8   c           
         [         R                  R                  U5      S   S:w  aO  [         R                  R                  [         R                  R                  U5      S   5      (       d  [	        S5      eU R
                  (       a  [        R                  R                  U R                  [         R                  R                  U R                  5      S   5        [        R                  " U R                  USSUR                  S5      S   S-   SS	S
9  O![        R                  " U R                  U5        U R                  b  [        U R                  S5      (       d  SOU R                  R                  nX R                  R                   l        [#        U5      R$                  R'                  S5      R)                  5       nU R                  R+                  US	S9  gg)zSave ONNX model.r    z!"root" directory does not exists.T/_data   Fsave_as_external_dataall_tensors_to_one_filelocationsize_thresholdconvert_attributeN
model_typer   )use_diff)r   r   splitr"   
ValueErrorrJ   r   r   r   r   r   
save_modelsaver!   hasattrr   	__class__r   r#   r$   r%   to_json_file)r2   rootr   output_config_files       r5   r   ONNXModel.save   sV   77==q!R'rww}}T?RST?U0V0V@AA%%BB4;;PRPWPWP]P]^b^n^nPopqPrsOO&*(,C,w6#"' IIdkk4(<<##*4<<#F#FDLLLcLcJ0:LL""-!%d!2!2!;!;M!J!S!S!ULL%%&85%I	 $r8   c                 B    U R                   R                  R                  $ )zReturn model nodes.)r   r,   r-   rI   s    r5   nodesONNXModel.nodes   s    {{  %%%r8   c                 B    U R                   R                  R                  $ )zReturn model initializer.)r   r,   r;   rI   s    r5   r;   ONNXModel.initializer   s    {{  ,,,r8   c                 .    U R                   R                  $ )zReturn model graph.)r   r,   rI   s    r5   r,   ONNXModel.graph   s    {{   r8   c                 .    U R                   R                  $ )zReturn model ir_version.)r   
ir_versionrI   s    r5   r   ONNXModel.ir_version   s    {{%%%r8   c                 .    U R                   R                  $ )zReturn model opset_import.)r   opset_importrI   s    r5   r   ONNXModel.opset_import   s    {{'''r8   c                     XR                   R                  R                  ;   a0  U R                   R                  R                  R                  U5        gg)zRemove a node from model.N)r   r,   r-   removert   s     r5   remove_nodeONNXModel.remove_node   s;    ;;$$)))KK""))$/ *r8   c                 8    U H  nU R                  U5        M     g)zRemove nodes from model.N)r   )r2   nodes_to_remover-   s      r5   remove_nodesONNXModel.remove_nodes   s    #DT" $r8   c                 d    U R                   R                  R                  R                  U/5        g)zAdd a node to model.Nr   r,   r-   extendrt   s     r5   add_nodeONNXModel.add_node   s"    %%tf-r8   c                 b    U R                   R                  R                  R                  U5        g)zAdd nodes to model.Nr   )r2   nodes_to_adds     r5   	add_nodesONNXModel.add_nodes   s     %%l3r8   c                     [        UR                  U R                  R                  R                  5      c1  U R                  R                  R                  R                  U/5        gg)zAdd a initializer to model.N)r   rd   r   r,   r;   r   r2   tensors     r5   add_initializerONNXModel.add_initializer   sI    T[[%6%6%B%BCKKK))00&: Lr8   c                 8    U H  nU R                  U5        M     g)zAdd initializers to model.N)r   )r2   tensorsr   s      r5   add_initializersONNXModel.add_initializers   s    F  ( r8   c                 z    U R                   R                  R                   H  nUR                  U:X  d  M  Us  $    g)zGet an initializer by name.N)r   r,   r;   rd   )r2   rd   r   s      r5   get_initializerONNXModel.get_initializer   s2    kk''33F{{d" 4 r8   c                     SnU R                  U5      c  U$ U R                  5        H  nXR                  ;   d  M  US-  nM     U$ )z(Get the number of shares of initializer.r   r   )r   r   rc   )r2   rd   numr-   s       r5   get_initializer_share_num#ONNXModel.get_initializer_share_num   sG    %-JJJLDzz!q ! 
r8   c                 z    U R                   R                  R                   H  nUR                  U:X  d  M  Us  $    g)zGet a node by name.N)r   r,   r-   rd   )r2   rd   r-   s      r5   get_nodeONNXModel.get_node  s2    KK%%**DyyD  + r8   c                     XR                   R                  R                  ;   a0  U R                   R                  R                  R                  U5        gg)z!Remove an initializer from model.N)r   r,   r;   r   r   s     r5   remove_initializerONNXModel.remove_initializer  s;    [[&&222KK))008 3r8   c                 8    U H  nU R                  U5        M     g)zRemove initializers from model.N)r   )r2   init_to_remover;   s      r5   remove_initializersONNXModel.remove_initializers  s    )K##K0 *r8   c                 z   U R                  U5      nU R                  U5        UR                  nUR                  nU(       d<  [        R
                  R                  XXRR                  5       R                  5       5      O,[        R
                  R                  XXRR                  5       US9nU R                  U5        g)zUpdate initializer.)rawN)r   r   dims	data_typer   helpermake_tensorflattentolisttostringr   )r2   r   arrayr   
old_tensorr   r   
new_tensors           r5   set_initializerONNXModel.set_initializer  s    ))&1

+((	  KK##Ft]]_=S=S=UV((D..BRX[(\ 	
 	Z(r8   c                     U R                   $ )zReturn input names of nodes.)r*   rI   s    r5   input_name_to_nodesONNXModel.input_name_to_nodes&       (((r8   c                 \   U GH   nUR                    Vs/ s HW  nUR                  [        R                  R                  :X  d*  UR                  [        R                  R
                  :X  d  MU  UPMY     nn[        U5      S:  a.  U H(  nU R                  UR                  R                  5        M*     UR                   Ha  n[        UR                  5       5      S:w  d  M"  XPR                  ;  a  U/U R                  U'   MC  U R                  U   R                  U5        Mc     GM#     gs  snf )zGet input names of nodes.r   N)	attributetyper   AttributeProtoGRAPHGRAPHSlenr+   gr-   rc   stripr*   append)r2   r   r-   attrattrs
input_names         r5   r+   "ONNXModel._get_input_name_to_nodes+  s    D !NN*D99 3 3 9 99TYY$J]J]JdJd=d *  
 5zA~!D11$&&++> ""jj
z'')*a/!)B)BBAE11*=11*=DDTJ ) s   AD)-D)c                     U R                   $ )zReturn output names of nodes.)r)   rI   s    r5   output_name_to_nodeONNXModel.output_name_to_node=  r   r8   c                    U H  nUR                    Vs/ s HW  nUR                  [        R                  R                  :X  d*  UR                  [        R                  R
                  :X  d  MU  UPMY     nn[        U5      S:  a.  U H(  nU R                  UR                  R                  5        M*     UR                   H0  n[        UR                  5       5      S:w  d  M"  X R                  U'   M2     M     gs  snf )zGet output names of nodes.r   N)r   r   r   r   r   r   r   r.   r   r-   rj   r   r)   )r2   r   r-   r   r   output_names         r5   r.   "ONNXModel._get_output_name_to_nodeB  s    D !NN*D99 3 3 9 99TYY$J]J]JdJd=d *  
 5zA~!D11$&&++> "#{{{((*+q0=A--k:  + s   AC6,C6c                     / nU R                  U5       HH  nU R                  U5       H0  nUR                  UR                  :w  d  M  UR                  U5        M2     MJ     U$ )zGet siblings nodes.)get_parentsget_childrenrd   r   )r2   r-   siblingsr#   childs        r5   get_siblingsONNXModel.get_siblingsQ  sT    &&t,F**62::*OOE* 3 - r8   Nc                     Uc  U R                   n/ nUR                   H&  nXB;   d  M
  X$    H  nUR                  U5        M     M(     U$ )zGet children nodes.)r*   rj   r   )r2   r-   r   childrenrj   r   s         r5   r   ONNXModel.get_childrenZ  sM    &"&";";kkF,08EOOE* 9 " r8   c                     Uc  U R                   n/ nUR                   H  nXB;   d  M
  UR                  X$   5        M     U$ )zGet parents nodes.)r)   rc   r   )r2   r-   r   parentsrc   s        r5   r   ONNXModel.get_parentsf  sD    &"&";";ZZE+29:   r8   c                     Uc  U R                   n[        UR                  5      U::  a  gUR                  U   nXC;  a  gX4   $ )zGet parent node by idx.N)r)   r   rc   )r2   r-   idxr   rc   s        r5   
get_parentONNXModel.get_parentq  sF    &"&";";tzz?c!

3+"))r8   c                 h    [        UR                  5      nUR                  U5        [        X5      nU$ )zFind out node by name.)listr-   r   r   )r2   	node_namenew_nodes_listr,   graph_nodes_listr-   s         r5   find_node_by_nameONNXModel.find_node_by_name  s.    

+/I8r8   c                     / nUR                    H8  nUR                   H%  nXRR                  :X  d  M  UR                  U5        M'     M:     U$ )z2Find all nodes with given initializer as an input.)r-   rc   rd   r   )r2   r,   r;   r   r-   
node_inputs         r5   find_nodes_by_initializer#ONNXModel.find_nodes_by_initializer  sB    JJD"jj
!1!11LL& )  r8   c                   ^ ^ UR                  S5      (       d  [        R                  SU S35        gUU 4S jmT R                  U   S   nUR                  S:X  a  XR
                  S   :X  d"  UR                  S	:X  a  XR
                  S
   :X  a  gT" U5      u  p4U(       d
   SU 35       eU(       d
   SU 35       eX44$ )z*Help function to get scale and zero_point.
_quantizedzFind z) in the quantized graph is not quantized.NNc                   > T
R                   U    S   nT
R                  R                  U S5      n/ SQnUbP  UR                  U;   a@  UR                  S   R                  SS5      R                  SS5      R                  SS5      nOUR                  S;   a@  UR                  S   R                  SS5      R                  SS5      R                  SS5      nO2U R                  SS5      R                  SS5      R                  SS5      nUS	-   nT
R                  U5      nUS
-   nT
R                  U5      nUb  Uc  Ub  T	" UR                  S   5      u  phXh4$ )z/Search scale and zero point tensor recursively.r   N)Reshape	TransposeSqueeze	UnsqueezeMaxPoolPadSplitr	  rw   _QuantizeLinear_QuantizeInput)Gather_scale_zero_point)r*   r)   r   rs   rc   replacerj   r   )tensor_namer-   r#   direct_int8fp32_tensor_namescalescale_tensorzo	zo_tensor	_searcherr2   s            r5   r   +ONNXModel.get_scale_zero.<locals>._searcher  sZ   ,,[9!<D..22;EFeK!fnn&CLLOW\2.W.3W-r2	 ! +KKNW\2.W.3W-r2	 !  ''b9AABSUWX``aqsuv ! %x/E//6L!M1B,,R0I#y'8%.7Q.H+L**r8   r   QLinearConvry   QGemmzmissing scale for tensor zmissing zero point for tensor )endswithr   debugr*   rs   rc   )r2   r   r-   r  r  r   s   `    @r5   get_scale_zeroONNXModel.get_scale_zero  s    |,,LL5(QRS	+B ((03 LLM)f

2.FLLG#**R.(@&/&7#LE#<VH!EE<G >vhGG9**r8   c                     U(       a?  [         R                  R                  U R                  S[	        U5      R
                  S-   S9  [         R                  " U R                  U5        g)zBSave model to external data, which is needed for model size > 2GB.Tz.data)r~   r   N)r   r   convert_model_to_external_datar   r   rd   r   )r2   output_pathuse_external_data_formats      r5   save_model_to_fileONNXModel.save_model_to_file  sO    #%%DDTDDUDZDZ]dDd E  	[1r8   c                     [        U[        5      (       a  [        U[        5      (       d   e[        [        U R                  5      5       H&  nU R                  U   U:X  d  M  X R                  U'   M(     g)zReplace input of a node.N)r   r   ranger   rc   )r-   old_input_namenew_input_namejs       r5   replace_node_inputONNXModel.replace_node_input  sU     .#..:nc3R3RRRs4::'Azz!}. .

1 (r8   c                 t   Uc  / nUc  / n[        U5      S:  aP  U R                  R                  R                   H+  nUR                  U;   d  M  [
        R                  XQU5        M-     gU R                  R                  R                   H+  nUR                  U;  d  M  [
        R                  XQU5        M-     g)zReplace inputs of all nodes.Nr   )r   r3   r,   r-   rs   r
   r4  )r2   r1  r2  white_optypeblack_optyper-   s         r5   replace_input_of_all_nodes$ONNXModel.replace_input_of_all_nodes  s    LL|q 

((--<<</00~V . 

((--<<|300~V .r8   c                     [        U[        5      (       a  [        U[        5      (       d   e[        [        U R                  5      5       H&  nU R                  U   U:X  d  M  X R                  U'   M(     g)zReplace output of a node.N)r   r   r0  r   rj   )r-   old_output_namenew_output_namer3  s       r5   replace_node_outputONNXModel.replace_node_output  sW     /3//JPS4T4TTTs4;;'(A{{1~0!0A )r8   c                 t   Uc  / nUc  / n[        U5      S:  aP  U R                  R                  R                   H+  nUR                  U;   d  M  [
        R                  XQU5        M-     gU R                  R                  R                   H+  nUR                  U;  d  M  [
        R                  XQU5        M-     g)zReplace outputs of all nodes.Nr   )r   r3   r,   r-   rs   r
   r>  )r2   r<  r=  r7  r8  r-   s         r5   replace_output_of_all_nodes%ONNXModel.replace_output_of_all_nodes  s    LL|q 

((--<<</11$Y . 

((--<<|311$Y .r8   c                 B   / nU R                  5       nU GH  nUR                  S:X  aa  UR                  S   U R                  R                  R                  ;  a0  UR                  S   U R
                  ;  a  UR                  U5        Mu  UR                  S:X  a  [        U R                  U5      5      S:X  a  U R                  U5      S   R                  S:X  a  UR                  S   U R                  ;  ac  U R                  U5      S   R                  S   U R
                  ;  a4  UR                  U5        UR                  U R                  U5      5        GME  SnUR                   H)  nXPR
                  ;   d  XPR                  5       ;   d  M'  Sn  O   UR                   H=  nU R                  U5      b  M  X`R                  ;   d  X`R                  5       ;   d  M;  Sn  O   U(       d  GM  UR                  U5        GM     U R                  U5        / nU R                  R                  R                   H  nUR                  U R
                  ;  d  M  UR                  U R                  R                  R                  ;  d  MO  UR                  U5        U R	                  5       R                   HH  n	U	R                  UR                  :X  d  M  U R	                  5       R                  R!                  U	5        MJ     M     U R#                  U5        U R%                  5         g)	zRemove unused nodes.Constantr   QuantizeLinearr   DequantizeLinearTFN)r   rs   rj   r   r,   r*   r   r   r   rc   r)   r   r   r   r;   rd   r   r   rm   )
r2   unused_nodesr   r-   unusedrj   rc   ununsed_weightswgraph_inputs
             r5   remove_unused_nodesONNXModel.remove_unused_nodes  sg   

D
*KKN$++*;*;*B*BBKKN$*C*CC##D) 00))$/0A5%%d+A.66:LLJJqM)B)BB%%d+A.55a8@Y@YY##D)##D$5$5d$;< "kkF!:!::f>U!& * "ZZE++E2> ";";;u

?T!& ( 6 ''-= > 	,'""..AvvT666166IZIZIaIa;a&&q)#'::<#5#5K"''1661

**11+> $6	 / 	  1r8   c           	      >  ^ ^^ U(       d  0 n0 mT R                   R                  R                   H  nUR                   HA  n[	        UR                  5       5      S:w  d  M"  XB;  a  U/X$'   M.  X$   R                  U5        MC     UR                   H'  n[	        UR                  5       5      S:w  d  M"  UTU'   M)     M     OT R                  nT R                  m0 m[        5       n[        5       nT R                   R                  R                   H   nUR                  X(R                     5        M"     T R                   R                  R                   H;  n	[        UU 4S jU	R                   5       5      (       d  M*  UR                  U	5        M=     U(       a  UR                  5       n	[        UU4S jU	R                   5       5      (       d  X;  a  UR                  U	5        MT  U	TU	R                  '   U	R                   HF  n
X;   d  M
  UR                  X*    Vs/ s H  oR                  T;  d  M  X;  d  M  UPM     sn5        MH     [	        U5      S:X  a5  [	        U5      S:w  a&  [        R                   " U5      nUR#                  5         U(       a  M  TR%                  5        Vs/ s H  oS   PM	     nn[	        ['        U V	s1 s H  oR                  iM     sn	5      5      [	        ['        T R                   R                  R                   V	s1 s H  oR                  iM     sn	5      5      :X  d   eT R                   R                  R)                  S5        T R                   R                  R                  R                  U5        gs  snf s  snf s  sn	f s  sn	f )zTopological sort the model.r   c              3   `   >#    U  H#  oT;  =(       a    UTR                  5       ;  v   M%     g 7fN)rc   ).0rf   r   r2   s     r5   	<genexpr>-ONNXModel.topological_sort.<locals>.<genexpr>D  s*     [SZa//IATZZ\4IISZs   +.c              3   V   >#    U  H  oT;   d  M
  TU   R                   T;   v   M      g 7frP  )rd   )rQ  rf   	all_nodesr   s     r5   rR  rS  I  s+     mAYlTl?*1-22i?s   	))r   r-   N)r3   r,   r-   rc   r   r   r   rj   r*   r)   r   r   rd   allpopleftcopydeepcopyclearitemsr   
ClearField)r2   enable_subgraphr   r-   r   r   qwaitinpnoutrf   r   rU  r   s   `            @@r5   topological_sortONNXModel.topological_sort*  s    "$"$

((--"&**J:++-.!3%@?Cf/;/;BB4H #- $(;;K;,,./14;?+K8 $/ . #'";";"&";";	Gw::##))CHH(23 *!!&&A[STSZSZ[[[ ' 		Ammmm=KKN !Iaffxx-HH)<)Al)AAVVS\E\aabaka)Alm   1v{s4yA~MM$'

 a  )010!1014/A/01StzzO_O_OdOd>eOd!vvOd>e9f5gggg

##F+

$$U+ m 2/>es$   N
1N
8N
(NNNc                 <   Uc  / n[        5       nU Hl  n[        U[        5      (       a  UR                  U5        M+  [        U[        R
                  5      (       a  UR                  UR                  5        Mg   S5       e   / nU Hl  n[        U[        5      (       a  UR                  U5        M+  [        U[        R
                  5      (       a  UR                  UR                  5        Mg   S5       e   U(       a  UR                  5       nXv;   a  M  Xs;  a  UR                  U5        OM7  [        U[        U R                  R                  R                  5      5      nU R                  U5       H  nUR                  UR                  5        M      U(       a  M  U$ )z4Get nodes chain with given start node and stop node.zM'get_nodes_chain' function only support list[string]or list[NodeProto] params)r   r   r   r   r   	NodeProtord   rW  r   r   r3   r,   r-   r   )	r2   startstopresult_chain
start_noder-   	stop_noder   r#   s	            r5   get_nodes_chainONNXModel.get_nodes_chainZ  sK   LW
D$$$!!$'D$..11!!$)),mmmu  	D$$$  &D$..11  +mmmu  "**,I%,##I.	4

0@0@0E0E+FGD**40!!&++. 1 j r8   c                    / nU R                   R                  R                   GH.  nSu  p4UR                  S:X  a0  UnU R	                  U/ SQ/ SQ5      U R	                  U/ SQ/ SQ5      /nUR                  S:X  a  UnU R	                  U/ SQ/ SQ5      U R	                  U/ SQ/ S	Q5      U R	                  U/ S
Q/ SQU R
                  / S9U R	                  U/ SQ/ SQ5      U R	                  U/ SQ/ SQ5      U R	                  U/ SQ/ SQ5      U R	                  U/ SQ/ SQ5      /nU(       d  GM
  [        U5      (       d  GM  UR                  U5        GM1     U$ )z,Find split node for layer wise quantization.r
  SkipLayerNormalizationMatMulr  r  r  rq  Nr   r   r   r   Addrq  r  r  rq  r   r   r   r   r   rt  r   Nr   r   r   r   Nr   r   r   r  Gemmr  r  r  rq  Nr   r   r   r   r   r   return_indicert  rq  r  r  r  rq  r   Nr   r   r   r   r   Nr   r   r   r   )rq  Mulrq  r  Divrt  )Nr   Nr   Nr   )rq  r  rq  SimplifiedLayerNormalizationrt  )Nr   Nr   r   )r   r,   r-   rs   match_parent_pathr   anyr   )r2   start_nodesr-   rj  qkv_nodes_lists        r5   +find_split_node_for_layer_wise_quantization5ONNXModel.find_split_node_for_layer_wise_quantization  s~    KK%%**D)3&J||77!
**"O*
 **"K'" ||u$!
 **"K*
 **"$WYk **"X-,0,D,D&( +  **"V-
 **"V-
 **"H3
 **"Z-E'"P ~&&z*} +~ r8   c                    / nU R                   R                  R                   GH(  nUR                  S:X  a  WR	                  UR
                  /5        M2  Su  pEUR                  S:X  a0  UnU R                  U/ SQ/ SQ5      U R                  U/ SQ/ SQ5      /nUR                  S:X  a|  UnU R                  U/ SQ/ S	Q5      U R                  U/ SQ/ S
Q5      U R                  U/ SQ/ SQU R                  / S9U R                  U/ SQ/ SQ5      U R                  U/ SQ/ SQ5      /nU(       d  GM  [        U5      (       d  GM  U Vs/ s H	  o"c  M  UPM     snS   n/ nUR                   H<  nXR                  ;  a  M  XS   R                  S   :X  a  M+  UR	                  U5        M>     [        U5      S:w  a  GM  US   n	U R                  n
X   nU Vs/ s H  oR                  PM     nnUR                  S5      S:X  d  GM  WR	                  U Vs/ s H   oR                  S:X  d  M  UR
                  PM"     sn5        U(       a  GM(    U$    W$ s  snf s  snf s  snf )zFind qkv MatMul in Attention.

Args:
    find_all (bool, optional): find all qkv MatMul. Defaults to False

Returns:
    qkv (list): qkv MatMul list
	Attentionr
  ro  rp  rr  rs  ru  rt  rv  rw  rx  rz  r{  r}  r~  r  ry   r   r   rq     )r   r,   r-   rs   r   rd   r  r   r  rc   rj   r   r   count)r2   find_allqkvr-   rj  r  	qkv_nodesother_inputsrc   
root_inputr   r   r   children_typess                 r5   find_qkv_in_attentionONNXModel.find_qkv_in_attention  s^    KK%%**D||{*

DII;')3&J||77!
**"O*
 **"K'" ||u$!
 **"K*
 **"$WYk **"X-,0,D,D&( +  **"V-
 **"V-1"< ~&&(6JJ2NIL#)) 8 88aL//22##E* * < A%%aJ"&":":*6H9ABmmNB##H-2

HZH5QY@YJEJJHZ[x
S +R 
% K CZs   IIII
*I
c                 @   / n[        [        U5      5       H  nU[        U5      S-
  :w  a1  XS-      nUS-
  S:  a  UR                  X&S-
     X&S-
     /5        MD  MF  X   nXc-   S-
  [        U5      :  d  M`  UR                  X&U-   S-
     X&U-   S-
     /5        M     U$ )zFind MatMul in FFN.

Args:
    attention_index (list): index of Attention
    attention_matmul_list (list): list of Attention and MatMul nodes
    block_len (int): block length

Returns:
    list: list of MatMul in FFN
r      r   )r0  r   r   )r2   attention_indexattention_matmul_list	block_len
ffn_matmulr   indexs          r5   find_ffn_matmulONNXModel.find_ffn_matmul"  s     
_-.Cc/*Q..'a019>%%'<QY'GI^gh_hIi&jk " (,$q(3/D+EE%%.y/@1/DEG\en]nqr]rGst / r8   c                    SSK Jn  SSKJn  [	        X#5      (       a  U" U R
                  U R                  5      u  pVnU R                  U5        U R                  U5        U R                  U5        U R                  5         U R                  5         U R                  5         U R                  U5        g[        R                  S5        [!        S5        g)zExport Qlinear to QDQ model.r   )ONNXQlinear2QDQConfig)onnx_qlinear_to_qdqzGUnsupported config for export, only ONNXQlinear2QDQConfig is supported!N)neural_compressor.configr  neural_compressor.utils.exportr  r   r   r*   r   r   r   rm   rL  rc  r   r   r   exit)r2   	save_pathconfr  r  r   r   initss           r5   exportONNXModel.export;  s    BFd22-@dNgNg-h*IUNN9%l+!!%(KKM$$&!!#IIi NNdeGr8   c                    / nU HM  nX0R                  5       ;  d  M  [        R                  R                  5       nX4l        UR                  U5        MO     U R                  R                  R                   R                  U5        g)zwAdd the tensors to the model outputs to gets their values.

Args:
    tensor_names: The names of tensors to be dumped.
N)	rj   r   r   ValueInfoProtord   r   r   r,   r   )r2   tensor_namesadded_outputsr   added_tensors        r5   add_tensors_to_outputs ONNXModel.add_tensors_to_outputsM  se     "F[[]*#{{99;$*!$$\2	 #
 	  ''6r8   c                 R   / nU Hg  nX0R                  5       ;   d  M  UR                  U R                  R                  R                   U R                  5       R	                  U5         5        Mi     U H2  nU R                  R                  R                   R                  U5        M4     g)zhRemove the tensors from the model outputs.

Args:
    tensor_names: The names of tensors to be removed.
N)rj   r   r   r,   r  r   )r2   r  removed_outputsr   rj   s        r5   remove_tensors_from_outputs%ONNXModel.remove_tensors_from_outputs[  s~     "F&&&t{{'8'8'?'?@S@STZ@['\] # &FKK$$++F3 &r8   c                     Uc  / n[        UR                  5       H,  u  pVXc;   d  M  X6   nUR                  U:X  d  M"  Xt;  d  M)  Xu4s  $    g)a  Find parent node based on constraints on op_type.

Args:
    node (str): current node name.
    parent_op_type (str): constraint of parent node op_type.
    output_name_to_node (dict): dictionary with output name as key, and node as value.
    exclude (list): list of nodes that are excluded (not allowed to match as parent).

Returns:
    parent: The matched parent node. None if not found.
    index: The input index of matched parent node. None if not found.
r
  )	enumeraterc   rs   )r2   r-   parent_op_typer   excluderf   rc   r#   s           r5   match_first_parentONNXModel.match_first_parenth  sO     ?G!$**-HA+,3>>^38M!9$	 .
 r8   c                 2   Uc   eUb  US:  d   eUc  / nUc  U R                   nUc*  U R                  XXE5      u  pxUb  UR                  U5        U$ U[        UR                  5      :  a  gU R                  XU5      nUb  UR                  U:X  a  Xu;  a  U$ g)a,  Find parent node based on constraints on op_type and index.

Args:
    node (str): current node name.
    parent_op_type (str): constraint of parent node op_type.
    input_index (int or None): only check the parent given input index of current node.
    output_name_to_node (dict): dictionary with output name as key, and node as value.
    exclude (list): list of nodes that are excluded (not allowed to match as parent).
    return_indice (list): a list to append the input index when input_index is None.

Returns:
    parent: The matched parent node.
Nr   )r)   r  r   r   rc   r   rs   )	r2   r-   r  input_indexr   r  r|  r#   r  s	            r5   match_parentONNXModel.match_parent~  s    , "kQ&666?G&"&";"; 33DJ]gMF($$U+M#djj/)4GH&..N"BvG\Mr8   c           
          [        U5      [        U5      :X  d   eUc  U R                  nUn/ n[        U5       H3  u  pU R                  UU	X8   U/ US9n
U
c    gUR	                  U
5        U
nM5     U$ )ak  Find a sequence of input edges based on constraints on parent op_type and index.

Args:
    node (str): current node name.
    parent_op_types (str): constraint of parent node op_type of each input edge.
    parent_input_index (list): constraint of input index of each input edge.
                               None means no constraint.
    output_name_to_node (dict): dictionary with output name as key, and node as value.
    return_indice (list): a list to append the input index when there is
                          no constraint on input index of an edge.

Returns:
    parents: a list of matched parent node.
N)r  r|  )r   r)   r  r  r   )r2   r-   parent_op_typesparent_input_indexr   r|  current_nodematched_parentsrf   rs   matched_parents              r5   r  ONNXModel.match_parent_path  s    , %&#o*>>>>&"&";";#O4JA!.."%#+ / N %"">2)L 5 r8   c                 v    U R                   R                  R                   H  nSUR                  ;   d  M    g   g)zfCheck the model is smooth quantized or not.

Returns:
    bool: the model is smooth quantized or not.
_smooth_scaleTF)r3   r,   r;   rd   )r2   rD   s     r5   is_smoothquant_modelONNXModel.is_smoothquant_model  s2     JJ$$00D$))+ 1 r8   c                 &    U R                  5       nU$ )z-Find split nodes for layer-wise quantization.)r  )r2   split_nodess     r5   find_split_nodesONNXModel.find_split_nodes  s    FFHr8   c                 P
   [         R                  " 5       nUR                  U R                  5        UR                  R                  S5        [         R                  " 5       nUR                  U R                  5        UR                  R                  S5        SnSnU R                  R                  R                   Hz  n	US:X  a&  UR                  R                  R                  U	5        O+US:X  a%  UR                  R                  R                  U	5        U	R                  U:X  d  Ml  U	R                  nSnM|     [        U5      S:X  d   SU S[        U5       S35       eUS   n
U(       aE   SS	KJn  U" U R                  S
[        R                  R                  U R                   5      S9U l        U R)                  U
5      u  p[         R*                  R-                  XU5      n[/        US
S9n[/        US
S9nUR1                  5         UR1                  5         UR2                  R                  R                  R                  U5        UR2                  R                  R4                  R                  U5        / n/ nUR6                   H  nUUR8                  ;   d  M  U R)                  U5      u  nn[         R*                  R-                  UUU5      nUUR2                  R                  R                  ;  a  UR                  U5        UUR2                  R                  R4                  ;  d  M  UR                  U5        M     U H2  nUR2                  R                  R                  R                  U5        M4     U H2  nUR2                  R                  R4                  R                  U5        M4     UR;                  5         UR;                  5         UR=                  5         UR=                  5         [        R                  R                  U5      nUR?                  U5        [        R                  RA                  US5      nUUl!        URE                  U5        URG                  5         [$        RI                  SU S35        U(       au  UR?                  U5        [        R                  RA                  US5      nUUl!        URE                  U5        URG                  5         [$        RI                  SU S35        XV4$ XV4$ ! ["         a  n[$        R'                  S5        UeSnAff = f)a  Split model into two parts at a given node.

Args:
    split_node_name (str): name of the node where the model is split at>
    path_of_model_to_split (str): path of model to be split.
    shape_infer (bool): do shape inference. Default is True.
    save_both_split_models (bool): whether to save the two split models.
        False means only save the first split model.
        True means save both the two split models.
        Default id True.

Returns:
    tuple: the first split model, the second split model
r-   Nr   r  zJOnly support split at node with 1 output tensor, while current split node z has z output tensorsr   )infer_shapesT)
auto_mergebase_dirzShape infer fails for layer-wise quantization. We would recommend checking the graph optimization level of your model and setting it to 'DISABLE_ALL' or 'ENABLE_BASIC', as this may help avoid this error.)r   zsplit_model_part_1.onnxzsave split model part 1 to z for layer wise quantizationzsplit_model_part_2.onnxzsave split model part 2 to )%r   
ModelProtoCopyFromr   r,   r\  r-   r   rd   rj   r   'neural_compressor.adaptor.ox_utils.utilr  r   r   r    r   rB   r   error%_get_output_type_shape_by_tensor_namer   make_tensor_value_infor
   _remove_unused_input_outputr3   rc   r   r   remove_unused_initrm    load_model_initializer_by_tensorjoinrN   _save_split_modelr   r&  )r2   split_node_namepath_of_model_to_splitshape_infersave_both_split_modelssplit_model_part_1split_model_part_2split_node_outputpart_idxr-   split_tensor_namer  rF   split_tensor_typesplit_tensor_shapesplit_tensorinsert_output_for_model_1insert_input_for_model_2rj   output_typeoutput_shapeoutput_tensorrc   dir_of_model_to_splitsplit_model_part_1_pathsplit_model_part_2_paths                             r5   split_model_with_nodeONNXModel.split_model_with_node  sv   * "__.##DKK0  ++F3!__.##DKK0  ++F3 KK%%**D1}"((--44T:Q"((--44T:yyO+$(KK! + $%* 	
XYhXiinor  tE  pF  oG  GV  W	
* .a0 P*4;;4RTRYRYRaRabfbrbrRst 150Z0Z[l0m-{{99:K`rs&'9$O&'9$O 	668668  &&--44\B  &&,,33LA$&!#% (<<F+???,0,V,VW],^)\ $ B B6;Xd e (:(@(@(F(F(M(MM-44]C (:(@(@(F(F(L(LL,33MB = 0F$$**1188@ 0 .E$$**0077> . 	--/--/!!#!!# "0F G;;<QR"$'',,/DF_"`(?%,,-DE//123J2KKghi!??@UV&(ggll3HJc&d#,C)001HI335LL67N6OOklm%99%99E  9 s   ,AS? ?
T%	T  T%c           
          [         R                  R                  US-   5      (       a  [         R                  " US-   5        [        R
                  " U R                  USSUR                  S5      S   S-   SSS9  g)	z|Save split model as external data for layer wise quantization.

Args:
    save_path (str): the path to save the split model
rz   Trx   ry   r{   Fr|   N)r   r   r"   r   r   r   r   r   )r2   r  s     r5   r  ONNXModel._save_split_model`  se     77>>)g-..IIi')*KK"&$(__S)"-7#	
r8   c                    [         R                  R                  nSnU R                  R                  R
                   H  nUR                  U:X  d  M  UR                  R                  R                  nUR                  R                  R                  R                   Vs/ s H&  oUR                  S5      (       a  UR                  OSPM(     nn  X#4$    X#4$ s  snf )zGet output type and shape with a tensor name.

Args:
    tensor_name (str): name of a tensor

Returns:
    tuple: output type and shape
N	dim_valuery   )r   r=   FLOATr   r,   
value_inford   r   tensor_type	elem_typeshapedimr<   r  )r2   r  r  r  rj   r  s         r5   r  /ONNXModel._get_output_type_shape_by_tensor_namer  s     $$**	kk''22F{{k)"KK33==	RXR]R]RiRiRoRoRsRsRs3\\+%>%>CMMBFRs    3 	s   -Cc                 <   / n/ nU R                   R                  R                   H0  nUR                  U R                  ;  d  M  UR                  U5        M2     U R                   R                  R                   H0  nUR                  U R                  ;  d  M  UR                  U5        M2     U H2  nU R                   R                  R                  R                  U5        M4     U H2  nU R                   R                  R                  R                  U5        M4     g)z-Remove unused input & output for split model.N)	r   r,   rj   rd   r   r   rc   r   r   )r2   remove_outputsremove_inputsrj   rc   s        r5   r  %ONNXModel._remove_unused_input_output  s    kk''..F{{$":"::%%f- / [[&&,,Ezz!9!99$$U+ - %FKK$$++F3 %"EKK##**51 #r8   c                     / nU R                   R                  R                   H0  nUR                  U R                  ;  d  M  UR                  U5        M2     U R                  U5        g)zRemove unused init.N)r   r,   r;   rd   r   r   r   )r2   remov_initsrD   s      r5   r  ONNXModel.remove_unused_init  sQ    KK%%11Dyy 8 88""4( 2 	  -r8   c                 l   Uc)  [         R                  R                  U R                  5      nU R                  R
                  R                   Hd  nUR                  S5      (       d  M  UR                  [        R                  R                  :X  d  ME  [        R                  R                  X!5        Mf     g)z~Load model initializer by tensor.

Args:
    data_path (str, optional): the directory of saved initializer. Defaults to None.
Nr:   )r   r   r    r   r   r,   r;   r<   r:   r   r=   r>   r   load_external_data_for_tensor)r2   	data_pathrD   s      r5   r  *ONNXModel.load_model_initializer_by_tensor  sx     (8(89IKK%%11D}}_--$2D2DHXHXHaHa2a))GGX 2r8   c                    U(       a  [         R                  R                  [         R                  R                  [         R                  R	                  U R
                  5      U5      5      (       a[  [         R                  " [         R                  R                  [         R                  R	                  U R
                  5      U5      5        U R                  5         [        R                  R                  U R                  US9  [        R                  R                  U R                  [         R                  R	                  U R
                  5      S9  g)aU  Write external data of merged quantized model to new location to save memory.

Args:
    external_data_location (str, optional): external data location of merged quantized model.
                                            Defaults to "external.data".
    overwrite (bool, optional): if True, remove existed externa data. Defaults to False.
)r   )filepathN)r   r   r"   r  r    r   r   r  r   r   r*  r   write_external_data_tensors)r2   external_data_location	overwrites      r5   #write_external_data_to_new_location-ONNXModel.write_external_data_to_new_location  s     RWW__TEUEU5VXn(oppIIbggll277??43C3C#DF\]^--/!!@@Wm@n!!==dkkTVT[T[TcTcdhdtdtTu=vr8   c                 @   UR                  5         U R                  [        UR                  5       5      5        U R	                  [        UR                  5       5      5        U R                  5         UR                  5       R                   HR  nUR                  U R                  5       ;  d  M#  U R                  R                  R                  R                  U5        MT     / nU R                  R                  R                   H4  nUR                  UR                  5       ;   d  M#  UR                  U5        M6     U H2  nU R                  R                  R                  R                  U5        M4     UR                  5       R                   H  nUR                  U R                  5       ;  d  M#  UR                  U R                  5       ;  d  MC  UR                  U R                  ;  d  M_  U R                  R                  R                  R                  U5        M     g)z'Merge two split model into final model.N)r  r   r   r   r   r;   rm   r,   rj   rd   r   r   rc   r   r   )r2   to_merge_modelrj   remove_outputrc   s        r5   merge_split_modelsONNXModel.merge_split_models  su   ::<tN00234d>#=#=#?@A %**,33F{{$++-/!!((//7 4
 kk''..F{{n2244$$V, / $FKK$$++F3 $ $))+11E

$**,.JJdkkm3JJd&>&>>!!''..u5 2r8   c                 |   0 n/ nU R                   R                  R                   H"  nXBUR                  '   UR	                  U5        M$     U H2  nU R                   R                  R                  R                  U5        M4     U H4  nU R                   R                  R                  R	                  X%   5        M6     g)z:Re-org output of merged model for layer-wise quantization.N)r   r,   rj   rd   r   r   )r2   origin_outputoutputs
tmp_removerj   out_names         r5   re_org_outputONNXModel.re_org_output  s    
kk''..F#)FKK f% / !FKK$$++F3 ! &HKK$$++G,=> &r8   )	r!   r/   r*   r   r   r   r)   r1   r(   )FrP  r
  )NNNN)TT)zexternal.dataF)O__name__
__module____qualname____firstlineno____doc__r6   r   propertyrJ   rN   setterrU   rY   r]   r3   rc   rj   rm   rp   r0   r   r   r;   r,   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r   r.   r   r   r   r   r  r  r'  r-  staticmethodr4  r9  r>  rA  rL  rc  rl  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __static_attributes__rT   r8   r5   r
   r
   &   st   B%. $ $           __" "     \\> >9:>    >
J0&-!&(0
#
.4;
)
	9
1
) ) )K$ ) )B
	*2+h2 / /W 1 1Z/b.,`%NJXSj2$744  *b !,\	 aev:p
$ (2".
Yw6:?r8   r
   )r  rX  loggingr   r@   collectionsr   pathlibr   r   onnx.external_data_helperutilr   r   	getLoggerr   r
   rT   r8   r5   <module>r*     sB   &    	 
      0			.	/
}? }?r8   