
    hF                         S SK r S SKrS SKrS SKrS SKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SSKJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SSK J!r!   " S	 S
\5      r"g)    N)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domainquantize_onnx_initializer&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                      \ rS rSr S(S jrS rS rS rS rS r	S	 r
S
 rS)S jrS rS rS rS rS*S jr S+S jrS r S(S jrS\R,                  S\R,                  S\S\R2                  S\S\\\R,                  S-  4   4S jrS\S\R,                  SS4S jrS,S jrS rS)S jr     S-S  jr!     S.S! jr"S/S" jr#  S0S# jr$S$ r%S% r&S& r'S'r(g)1ONNXQuantizer&   Nc                 l   [         R                  " U UUUUUUU	U
UU5        U(       Gd  U R                  R                  5         [	        U R                  R                  5      nUR
                  R                   Vs0 s H  oR                  U_M     snU l        U R                  R                  UR
                  R                   Vs0 s H  oR                  U_M     sn5        U R                  R                  UR
                  R                   Vs0 s H  oR                  U_M     sn5        [        U5      U l        X@l        XPl        U R                  S:  U l        SU R"                  ;   =(       a    U R"                  S   U l        / U l        SU l        0 U l        U R*                  R                  UR
                  R                   Vs0 s H  oR                  S_M     sn5        U R*                  R                  UR
                  R                   Vs0 s H  oR                  S_M     sn5        U R                  R                  R
                  R,                   H<  nU R*                  R                  [.        R1                  UR                  S5      5        M>     U R                  [2        ;  a  [5        SU R                   35      eU R7                  5       U l        SU l        SU l        SU l        S	U l         0 U l!        U R                  RE                  5       U l#        g s  snf s  snf s  snf s  snf s  snf )
N
   MatMulConstBOnly/r   zunsupported quantization mode fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zp)$r   __init__modelreplace_gemm_with_matmulr   graph
value_infonamevalue_infosupdateoutputinputr   modestaticopset_versionfuse_dynamic_quantextra_optionsq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnodedictfromkeysr
   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapget_non_initializer_inputsgenerated_value_names)selfr'   per_channelreduce_ranger0   r1   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer4   viotitr9   s                    a/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/quantization/onnx_quantizer.pyr&   ONNXQuantizer.__init__'   s    	 	
 JJ//1:4::;K;KLE6;kk6L6LM6L6LMD##5;;;M;M$N;MRWWb[;M$NO##5;;;L;L$M;LRWWb[;L$MN"5)DJ	"&"4"4r"9%74;M;M%M%xRVRdRdewRx"  u{{7I7I!J7I''1*7I!JK  u{{7H7H!I7H''1*7H!IJJJ$$**//D$$T]]4;;%BC 0 99,,=dii[IJJ#'#E#E#G  (H$&E#+"1 $&  &*ZZ%J%J%L"K  N$N$M "K!Is   :LL"L'L,L1c                 2   [         R                  R                  USU R                  R                  R                  S9n[        U5        [        UU R                  U R                  U R                  U R                  U R                  U R                  U R                  U R                  U R                  U R                   U R"                  5      nXl        U R&                   U S3Ul        UR)                  5         UR                  R                  R*                  $ )z
generate submodel for the subgraph, so that we re-utilize current quantization implementation.
quantize the submodel
update subgraph and set it back to node
onnx-quantizer)producer_nameopset_importsr!   )onnxhelper
make_modelr'   opset_importr   r   rG   rH   r0   r1   rI   rJ   rK   rL   rM   rN   r4   parentr7   quantize_modelr)   )rF   subgraph	graph_keywarped_modelsub_quantizers        rR   quantize_subgraphONNXQuantizer.quantize_subgraphp   s     {{--*****77 . 

 	<(%IIKK!!""!!%%
  $'+'7'7&81$E!$$&""((...    c                 ^   UR                    Vs/ s HW  nUR                  [        R                  R                  :X  d*  UR                  [        R                  R
                  :X  d  MU  UPMY     nn[        U5      S:X  a  U$ UR                  (       a  UR                  O#UR                   S[        U R                  5       3n0 nUR                    GH	  nUR                  [        R                  R                  :X  a8  UR                  U R                  UR                  U SUR                   35      0nOUR                  [        R                  R
                  :X  aa  / nUR                   H@  nUR                  U R                  UU SUR                   S[        U5       35      /5        MB     UR                  U0nO[        U5      nUR                  U5        GM     [        R                   R"                  " UR                  UR$                  UR&                  4SUR                  0UD6$ s  snf )zd
Check subgraph, if any, quantize it and replace it.
return new_nodes added for quantizing subgraph
r   _node_count_:r+   )	attributetyperX   AttributeProtoGRAPHGRAPHSlenr+   op_typer6   rb   ggraphsextendr   r-   rY   	make_noder/   r.   )	rF   r9   attrgraph_attrs	node_namekwargskvvaluer^   s	            rR   quantize_node_with_sub_graph*ONNXQuantizer.quantize_node_with_sub_graph   s    
&yyD//555dFYFYF`F`9` & 	 

 {q K!%DII4<<.SQUQ_Q_M`La0b	NNDyyD//555ii!7!79+Qtyyk@Z![\d11888 $HLL 22 (#,+Qtyyk3u:, G !, ii''-MM"# #$ {{$$T\\4::t{{eQUQZQZe^dee7
s   AH*'H*c                 V    [        S U R                  R                  5        5       5      $ )zA
Detect if model already has QuantizeLinear or DequantizeLinear.
c              3   j   #    U  H)  oR                   S :H  =(       d    UR                   S:H  v   M+     g7f)QuantizeLinearDequantizeLinearN)rn   ).0r9   s     rR   	<genexpr>.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>   s/      
_qW[LL,,R@R0RR_qs   13)anyr'   nodes)rF   s    rR   has_QDQ_nodesONNXQuantizer.has_QDQ_nodes   s-      
_c_i_i_o_o_q
 
 	
rd   c                     [        XR                  R                  5       5      b  gU R                  b  U R                  R	                  U5      $ g)NTF)r   r'   initializerr\   find_initializer_in_path)rF   initializer_names     rR   r   &ONNXQuantizer.find_initializer_in_path   sA    (***@*@*BCO;;";;778HIIrd   c                     U R                   R                  U5        U H1  nUR                   H  nU R                  R	                  U5        M      M3     g N)r6   rq   r.   rE   add)rF   r   r9   output_names       rR   add_new_nodesONNXQuantizer.add_new_nodes   s@    e$D#{{**..{;  + rd   c                    U R                  5       (       a  [        R                  " S5        U R                  R	                  5        H  nU R
                  (       a  U R                  U5      n[        U R                  5      n[        X5      nUR                  5         [        U[        U R                  5      5       H>  nU R                  U   R                   H  nU R                  R                  U5        M      M@     M     U R                  5         U R                  R!                  5       R#                  S5        U R                  R!                  5       R$                  R'                  U R                  5        U R(                  cB  U R                  R+                  5       u  pg[        U5      S:  a  [-        S[/        U5      -   5      e[0        U R                  R                  l        [4        U R                  R                  l        U R                  R                  R8                   Vs/ s H  oR:                  [<        :X  d  M  UPM     n	nU	(       dt  U R                   Vs/ s H  oR:                  S:X  d  M  UPM     n
nU
(       a@  U R                  R                  R8                  R                  5       nSUl        [<        Ul        U R                  R                  $ s  snf s  snf )NzPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.r9   r   z0Invalid model with unknown initializers/tensors.zcom.microsoftr   ) r   loggingwarningr'   r   enable_subgraph_quantizationry   rm   r6   r   quantizeranger.   rE   r   _dequantize_outputsr)   
ClearFieldr9   rq   r\   clean_initializersRuntimeErrorstrr   rV   r   producer_versionr[   domainr   version)rF   r9   number_of_existing_new_nodesop_quantizerir   _initializers_not_foundopsetms_opsetms_nodess              rR   r]   ONNXQuantizer.quantize_model   s   OOn
 JJ$$&D0088>+.t~~+>(,T8L!!#7T^^9LM#'>>!#4#;#;K..22;? $< N ' 	  " 	

%%f-

&&t~~6 ;;(,

(E(E(G%A)*Q."#UX[\rXs#stt)5

&,7

)'+zz'7'7'D'Db'DeXaHaE'Db)-Z;;/;YHZ

((5599; !(zz cZs   %K KK4Kc                     SU R                   ;   a4  [        R                  " SUU R                   S   5        U R                   S   $ [        SU< S35      e)NDefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)r4   r   infor   rF   tensor_names     rR   _get_default_tensor_type&ONNXQuantizer._get_default_tensor_type   sf    $"4"44LLV""#67
 %%&9::7 GI J
 	
rd   c                    [        XR                  R                  5       5      nUb  UR                  $ XR                  ;   a  U R                  U   nUR
                  R                  S5      (       a\  U(       a5  UR
                  R                  R                  S:X  a  U R                  U5      $ UR
                  R                  R                  $ U R                  (       a  U R                  c  U(       a  U R                  U5      $ g U R                  R                  U5      nUb  U$ U R                  (       a1  U R                  (       a   U R                  R                  U5      nUb  U$ U(       a  U R                  U5      $ g )Ntensor_typer   )r   r'   r   	data_typer,   ri   HasFieldr   	elem_typer   r   r\   is_valid_quantize_weightget_tensor_type)rF   r   	mandatoryweightrO   otyperess          rR   r   ONNXQuantizer.get_tensor_type  s)   k::+A+A+CD###***!!+.Bww..!4!4!>!>!!C88EEww**44411t{{7J44[AA44[AL,,++--k:C
00==rd   c                 ~   U R                  U5      (       a  U R                  U5      $ XR                  ;   a  U R                  U   nUR                  R	                  S5      (       aW  UR                  R
                  R                  [        R                  R                  [        R                  R                  4;   a  g[        R                  " SU< SUR                   S35        gU R                  (       a,  U R                  (       a  U R                  R                  U5      $ [        R                  " SU< S35        g)	Nr   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)is_input_a_initializerr   r,   ri   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16r   r   r   r\   is_float_tensor)rF   r   rO   s      rR   r   ONNXQuantizer.is_float_tensor  s	   &&{3300==***!!+.Bww..2773F3F3P3P&&,,&&..U 4 OON{o]ghjhohogppqr ,,;;..{;;3K? C6 7	
 rd   c                     U[         R                  R                  :X  a  U R                  XU5      $ U[         R                  R                  :X  a  U R                  XU5      $ [        SU S35      e)a\  
Create nodes for dynamic quantization of input and add them to nodes_list.
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter qType: type to quantize to.
    parameter initial_type: type to quantize from
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8r<   )rF   
input_name
nodes_listqTypeinitial_types        rR   &_get_dynamic_input_quantization_params4ONNXQuantizer._get_dynamic_input_quantization_params6  si     J**///CCJ\hiiJ**000DDZ]ijj6ugQ?@@rd   c                    [         R                  R                  nUS-   nUS-   n[        R                  R                  SU/US-   /USS9nUR                  U5        US-   n[        R                  R                  SU/US-   /USS9n	UR                  U	5        US	-   n
[        R                  R                  S
UR                  S   /U
S-   /U
5      nUR                  U5        US	-   n[        R                  R                  S
U	R                  S   /US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  S   UR                  S   /US-   /U5      nUR                  U5        [        R                  R                  U R                  U/ [        U5      S-  /5      nU R                  R                  U5        US-   n[        R                  R                  SUR                  S   U R                  /U/U5      nUR                  U5        [        R                  R                  U R                  U/ S/5      nU R                  R                  U5        XPR                  / / 4$ )aJ  
Create nodes for dynamic quantization of input to int8 and add them to nodes_list
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter initial_type: initial weight type (FLOAT or FLOAT16)
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
_scale
_ReduceMin	ReduceMin:0r   keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMax       @	scale_DivDiv)r   r   r   rX   rY   rr   appendr.   make_tensorr@   r   r'   add_initializerrB   )rF   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_nodeinitializer_zps                       rR   r   9ONNXQuantizer._get_dynamic_input_quantization_params_int8E  s    &&++ &0$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* .6"kk33##A&' 4'(	
 	-.-6"kk33##A&' 4'(	
 	-.!J.{{,, ''*,?,F,Fq,IJD !	
 	,'++11''!%(3./	
 	

""?3#k1..  #T%@%@A	
 	.) 001H1H%QSVWUXY

"">2!8!8"b@@rd   c                    [         R                  R                  nUS-   nUS-   nUS-   n[        R                  R                  SU/US-   /USS9nUR                  U5        US-   n	[        R                  R                  S	U/U	S-   /U	SS9n
UR                  U
5        [        R                  R                  U R                  U/ [        U5      /5      nU R                  R                  U5        [        R                  R                  U R                  U/ S
/5      nU R                  R                  U5        US-   n[        R                  R                  SU
R                  S   UR                  S   /US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  S   U R                  /U/U5      nUR                  U5        US-   n[        R                  R                  SU R                  UR                  S   /US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  S   U/US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  U/UUS9nUR                  U5        XV/ / 4$ )aK  
Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter initial_type: initial weight type (FLAOT or FLOAT16)
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
r   _zero_pointr   r   r   r   r   r   r           
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCast)to)r   r   r   rX   rY   rr   r   r   r?   r   r'   r   rA   r.   )rF   r   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes                            rR   r   :ONNXQuantizer._get_dynamic_input_quantization_params_uint8  s4    &&,,%0"]2$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* "[[44((!%()	
 	

""#56![[44T5I5I<Y[^a]bc

""#56 $l2..##A&(>(>q(ABd"#	
 	.)#l2..""1%t'C'CD	
 	.) !#44kk++!!?#9#9!#<=4 	
 	+& #44kk++"$454 	
 	+&"%88--g{7I7IM\`L`Kacpq-(!$66{{,,V]5I5IM?\hmr,s,'B66rd   c                 |   U R                   nUb  UGcW  U R                  b  XR                  ;  a  [        R                  " SU S35        gU R                  U   n[	        U[
        5      (       d  [        S[        U5       SU< S35      eUb  [        U5      S:w  a  [        SU S	U 35      e[        R                  " US
   /5      n[        US   S5      (       a1  US   R                  [        R                  [        R                  4;  a  [        S[        US   5       SU< 35      e[        R                  " US   /5      nUR                  [        R                   :w  d   eUS   nO[        R                  " U/5      n[        R                  " U/5      nU R                  U   nSU;   a   US   R                  nUR#                  U5      nUR                  [        R                   :w  d   e/ n	US-   n
/ nUS-   n[$        R&                  R)                  XXR+                  5       R-                  5       5      nU R.                  R1                  U5        UR                  [        R                  :X  a  [2        R4                  R6                  nOUUR                  [        R                  :X  a  [2        R4                  R8                  nO[        SUR                   SU< 35      e[$        R&                  R)                  XXR;                  S5      R-                  5       5      nU R.                  R1                  U5        SXX4$ )a4  
Create initializers and inputs in the graph for zero point and scale of output.
Zero point and scale values are obtained from self.quantization_params if specified.
    parameter param_name: Name of the quantization parameter.
    return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
z$Quantization parameters for tensor:"z" not specified)F r  r  r  Unexpected type  for r      zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=)T)rJ   r>   r   r   
isinstancer   	TypeErrorri   rm   r<   nparrayhasattrr  float32float16float64astyperX   rY   r   raveltolistr'   r   r   r   r   r   reshape)rF   
param_name	use_scaleuse_zeropointzero_point_typeparamszero_point_valuesscale_valuesr  zero_point_shapezero_point_namescale_shape
scale_nameinit_zp
scale_type
init_scales                   rR   _get_quantization_params&ONNXQuantizer._get_quantization_params  s    // 5''/:E]E]3]CJ<_`,--j9Ff&899"24<.j^ST UVV~V!1 33=,bJ 
 !#&*>)? @6'?G44w8M8MVXV`V`bdblblUm8m #3D4I3JJZ[eZh!ijj88VG_$56L%%333$\2O "- 988YK0L--j9F& w--+2259%%333$}4(*
 ++)).>@W@W@Y@`@`@b
 	

""7++#//55J2::-#//77J01C1C0DDTU_Tbcdd[[,,Z[RfRfglRmRtRtRvw


"":.Z+OOrd   c           	         UR                   U   nUS:w  d   S5       eU[        -   nUS-   n	Ub  Ub  SXEpn
OU R                  U5      u  pn  n/ nU
(       a&  [        R                  R                  SX{U/U/U	5      nOU R                  (       a  gU R                  (       aN  U[        R                  R                  :X  a0  US-   nUS-   n[        R                  R                  S	U/XU/U	5      nOQUc   S
U< SU SU SU 35       eU R                  X~X6S9u  nnnn[        R                  R                  SX{U/U/U	5      n[        XxXU5      U R                  U'   / UQUP$ )ar  
Given an input for a node (which is not a initializer), this function

- add nodes to compute zero point and scale for this input if they don't exist.
- add new QuantizeLinear node to quantize the input.

:param node: node being quantized in NodeProto format.
:param input_index: index of input in node.input.
:param qType: type to quantize to.
:param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
:param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
:param initial_type: type of the weight to quantize
:return: List of newly created nodes in NodeProto format.
r  z*Cannot access undefined variable in graph._QuantizeLinearNTr}   r   r   DynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r/   r	   r1  rX   rY   rr   r1   r3   r   r   r   r   r   rC   )rF   r9   input_indexr   given_scale_namegiven_zp_namer   r   r   ql_node_name
data_foundr-  zp_namer   r   qlinear_noder,  zp_shapes                     rR   _get_quantize_input_nodes'ONNXQuantizer._get_quantize_input_nodes2  s   " ZZ,
RM!MM #;;!$55(}/H/35EGJG484Q4QR\4]1JGQ;;00 1	L {{ &&5J4J4J4P4P+P'(2
$}4#{{44+L g6 	  $/ "",~k](SXRYY`ae`fh/ ??
SX?t#{{44$W5 M 	  0>jWalq/r  ,%%%%rd   c                     XR                   ;   a  U R                   U   $ U R                  b  U R                  R                  U5      $ g r   )rC   r\   find_quantized_value)rF   r   s     rR   rB  "ONNXQuantizer.find_quantized_valuex  sA    111++J77;;";;33J??rd   c
                    [         R                  " U5      n
USU
-  -  U-  n[         R                  " UR                  5       [         R                  S9n[         R                  " UR                  5       [         R                  S9nX-  nX:  a~  US:  ax  X-  nX-  nU	c7  [
        R                  " SU SU SU S35        S[         R                  " UUS94$ [
        R                  " S	U	 S
U SU SU S3	5        SUR                  U5      4$ SU4$ )zHAdjust a single weight scale to ensure the int32 bias does not overflow.r   r  r   zIncreasing scale for weight `z` by the ratio z to ensure bias `z` has a valid scale.TzIncreased scale[z] for weight `z` by ratio F)r  absr  itemr  r   r   r  )rF   bias_valinput_scaleweight_scaleweight_scale_dtypeweight_name	bias_nameqrangemultiplicative_epsilonidxabsmaxbias_smallest_valid_scaleinput_scale_fp64weight_scale_fp64bias_candidate_scaleratio	new_scales                    rR   $adjust_single_weight_scale_if_needed2ONNXQuantizer.adjust_single_weight_scale_if_needed  s/    !$:cFl$Kf$T!88K$4$4$6bjjIHH\%6%6%8

K/C <CWZ]C]-DE)1I{3K=PUw W$$-;.BD RXXi7IJJJ&se>+kRWQX Y''0k1EG Y--.@AAAl""rd   rI  rJ  rL  bias_tpis_per_channelreturnc                 v   UR                   (       d  g[        U5      n[        R                  " [        R                  5      nSn[        R
                  " UR                  [        R                  S9[        R
                  " UR                  S-   [        R                  S9-
  n	UR                  n
SnU(       d  [        R                  " UR                  5       [        R
                  " S[        R                  S95      n[        R                  " UR                  5       [        R
                  " S[        R                  S95      n[        R                  " [        R                  " U5      [        R                  " U5      5      nU R                  UUUU
UUR                  U	U5      u  nnU(       a  UnSnX4$ UR                  (       ar  [!        UR                  5      S:X  aY  [#        UR                  S   5       H=  nU R                  UU   UUU   U
UUR                  U	UUS9	u  nnU(       d  M6  UUU'   SnM?     X4$ )	zOChecks if the bias scale is too small and increases the weight scale if needed.)FNgqh ?rE  r   Fr   T)rP  )sizer   r  iinfoint32r  maxr  minr  minimummaximumrF  rX  r+   shaperm   r   )rF   rI  rJ  rL  rZ  r[  bias_float_data
int32_inforO  rN  rK  updatedrminrmaxrQ  changedrW  r   s                     rR   #_adjust_weight_scale_for_int32_bias1ONNXQuantizer._adjust_weight_scale_for_int32_bias  s      /8XXbhh'
!'*..

;bhhz~~XYGYacakak>ll)//::o113RXXarzz5RSD::o113RXXarzz5RSDZZtbffTl;F!%!J!J"&	"GY ($ $$# C(:(:$;q$@<--a01%)%N%N#A& O&LL* &O 
&" 7&/LO"G 2  $$rd   rW  c           	      &   XR                   ;  a  gU R                   U   n[        XR                  R                  5       5      n[        UR                  U R                  R                  5       5      n[        UR
                  U R                  R                  5       5      n[        UR                  U R                  R                  5       5      nUb	  Ub  Ub  Uc  gU R                  R                  U5        U R                  R                  U5        [        R                  R                  U5      nUR                  n	[        R                  " U[        R                  R                  UR                   5      S9n
[        R                  R#                  U
R%                  UR&                  5      UR                  5      nU R                  R)                  U5        [+        UU R,                  UU
U	UR                  S9nU R                  R)                  U5        g)zCRe-quantizes the given weight initializer using the provided scale.NrE  )quant_weight_name)rC   r   r'   r   r-  r<  q_nameremove_initializerrX   numpy_helperto_arrayaxisr  asarrayrY   tensor_dtype_to_np_dtyper   
from_arrayr"  dimsr   r   rI   )rF   rL  rW  qv	weight_tp
scale_initzp_initq_weight_initweight_zero_pointrt  scale_npnew_scale_initnew_q_weights                rR   _requantize_weight ONNXQuantizer._requantize_weight  s    666%%k2 jj.D.D.FG	!"--1G1G1IJ
rzz4::+A+A+CD$RYY

0F0F0HI
 2goI^

%%j1

%%m4 --66w?ww ::it{{/S/ST]TgTg/hi**55h6F6Fz6WY[YfYfg

"">2 1 ii
 	

""<0rd   c                    XR                   ;   a  U R                   U   R                  $ U R                   U   R                  n[        XPR                  R                  5       5      n[        U5      nX R                   ;   a  U R                   U   R                  nO5X R                  ;   a  U R                  U5      u  p    n	O[        SU S35      e[        XR                  R                  5       5      n
[        U
5      nU R                   U   R                  n[        XR                  R                  5       5      nUb  [        R                  R                  U5      OSnU R                  nUb  UR                  (       a  UR!                  5       (       d  U R"                  [$        R&                  R(                  4;   aV  [        XR                  R                  5       5      nU R+                  UUUUU5      u  nnU(       a  U R-                  UU5        UnU R/                  XXt5      u  nnnnnnXR                   ;  d   e[1        UUUU[2        R4                  UR                  S:  a  SOSUUS9nUU R                   U'   U$ )zM
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
z	Expected z5 to be in quantized value map for static quantizationNr   r   )	node_type
node_qtype)rC   rp  r-  r   r'   r   r   r>   r1  r<   r<  rX   rr  rs  rG   r^  r   rI   r   r   r   rl  r  quantize_bias_static_implr   r   Initializer)rF   rM  r   rL  betaweight_scale_nameweight_initializerrJ  r   r   inputscale_initializerrI  weight_zp_nameweight_zp_initr~  r[  bias_initializer
did_updatenew_weight_scalequantized_bias_namequantized_bias_scale_namequantized_bias_zp_namebias_scale_datar  r  quantized_values                             rR   quantize_bias_static"ONNXQuantizer.quantize_bias_static  s_    000++I6=== !44[ALL)*;ZZ=S=S=UV,-?@ 111#77
CNN333+/+H+H+T(AAqy4ijkk!-.>

@V@V@X!Y+,BC 11+>FF%njj6L6L6NOJXJdD--66~Fjn)))!&&%))++!!j&<&<&A&A%CC+Izz7M7M7OP+/+S+S ,(J( ''5EF/ **9<V	
%"  8 8888(%"** %%)At!	
 />  +""rd   c                 r    XR                   ;   =(       d#    XR                  ;   =(       d    XR                  ;   $ )za
only check for value info and newly generated tensor names, initializers are checked separately
)r,   r8   rE   r   s     rR   contains_tensorONNXQuantizer.contains_tensorJ  s7    
 ,,, ;000;999	
rd   c           
      ,    U R                  UUSSSSUS9$ )NFr  r9   indicesinitializer_use_weight_qTyperH   op_level_per_channelrt  from_subgraph_ONNXQuantizer__quantize_inputs)rF   r9   r  r  s       rR   quantize_activation!ONNXQuantizer.quantize_activationT  s/    %%).!&' & 
 	
rd   c           
      ,    U R                  UUSUUUUS9$ )NTr  r  )rF   r9   r  rH   r  rt  r  s          rR   quantize_weightONNXQuantizer.quantize_weighta  s1     %%)-%!5' & 
 	
rd   c                    / n/ n	/ n
/ nU GHj  nUR                   U   nXR                  ;   ab  U R                  U   nUR                  UR                  5        U	R                  UR                  5        U
R                  UR
                  5        M  U(       d5  U
R                  S5        UR                  S5        U	R                  S5        M  [        XR                  R                  5       5      nUb  U R                  (       aG  U(       a@  U R                  UR                  U(       a  U R                  OU R                  UU5      u  nnnO4U R                  UU(       a  U R                  OU R                  U5      u  nnnU
R                  U5        U	R                  U5        UR                  U5        GM  U R                  U5      (       Ga"  U R                  R!                  US-   U R"                  U R                  R%                  5       5      nUGc  UR                   U   nUU R&                  ;   a|  U R&                  U   nUR)                  S5      (       d   SU S35       eUR*                  R)                  S5      (       d   SU S35       eUR*                  R,                  R.                  nO+UU R0                  ;   d   SU< S	35       eU R0                  U   nU R3                  XU R                  US
9nUc    gU(       a  U R5                  U5        OUR7                  U5        US   nUR8                  S:X  aZ  U
R7                  UR:                  5        UR                  UR                   S   5        U	R                  UR                   S   5        GM  U
R                  UR:                  S   5        UR                  UR:                  S   5        U	R                  UR:                  S   5        GM  U R<                  bd  U R<                  R?                  UU/UUUUSS9u  nnnnU
R                  US   5        UR                  US   5        U	R                  US   5        GMR  [A        SU SU RB                   35      e   XX4$ )aC  
Given a node, this function quantizes the inputs as follows:
    - If input is an initializer, quantize the initializer data, replace old initializer
      with new initializer
    - Else, add QuantizeLinear nodes to perform quantization
    parameter node: node being quantized in NodeProto format.
    parameter indices: input indices to quantize.
    return: (List of quantized input names,
             List of zero point names used for input quantization,
             List of scale names used for input quantization,
             List of new QuantizeLinear nodes created)
r  r4  ri   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r6  )NNNNr  r}   r      r   T)r  rH   r  rt  r  z!Invalid tensor name to quantize: z @graph scope)"r/   rC   r   r-  r<  rp  r   r'   r   rG   quantize_weight_per_channelr+   rI   rJ   quantize_initializerr  find_node_by_namer6   r)   r,   r   ri   r   r   r8   r?  r   rq   rn   r.   r\   r  r<   r7   )rF   r9   r  r  rH   r  rt  r  scale_nameszero_point_namesquantized_input_namesr   r7  
node_inputr  r   q_weight_namer<  r-  r=  r   r*   r   quantize_input_nodesparent_quantized_input_namesparent_zero_point_namesparent_scale_namesr   s                               rR   __quantize_inputsONNXQuantizer.__quantize_inputst  sY   .  ""KK0J 555"&":"::"F""?#=#=> ''(?(?@%,,_-C-CD%,,R0""2& ''+&z::3I3I3KLK&##(<
 88#((-I))tOdOd$		%" :>9R9R#-I))tOdOd$:6M7J &,,]; ''0"":.%%j11#zz;;!22DNNDJJDTDTDV   '!%K!8J!T%5%55%)%5%5j%A
)226::ck*Ub<cc:)77FFs+V`UaarHssF'1'B'B'L'L  *T->->> 9* H+ ,>
 (,'8'8'D+/+I+I4+@+@| ,J ,( ,37$**+?@%9:#7#;L''+;;)001D1DE&&|'9'9!'<=$++L,>,>q,AB)001D1DQ1GH&&|':':1'=>$++L,?,?,BC( KK11 M1M!-)="& 2 0+& &,,-I!-LM""#5a#89 ''(?(BC !#DZLP]^b^n^n]o!pqqG #J %JJrd   c                 `   UR                   U R                  ;   a<  U R                  UR                      nUR                  UR                  UR                  4$ U R                  XX45      u  pgn[        UR                   UUU[        R                  S5      nXPR                  UR                   '   XgU4$ )aj  
:param weight: TensorProto initializer
:param qType: type to quantize to
:param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                          If keep_float_weight is False, quantize the weight, or don't quantize the weight.
:return: quantized weight name, zero point name, scale name
N)	r+   rC   rp  r<  r-  quantize_initializer_implr   r   r  )	rF   r   r   rH   keep_float_weightr  r  r<  r-  s	            rR   r  "ONNXQuantizer.quantize_initializer  s     ;;$222"66v{{CO&&''**  .2-K-K<.
*

 )KK**
 1@  -z11rd   c                    XR                   ;   a2  U R                   U   nUR                  UR                  UR                  4$ U R	                  XX4U5      u  pxn	[        UUU	U[        R                  S 5      nX`R                   U'   XxU	4$ r   )rC   rp  r<  r-   quantize_weight_per_channel_implr   r   r  )
rF   rL  rI   channel_axisrH   r  r  r  r<  r-  s
             rR   r  )ONNXQuantizer.quantize_weight_per_channel  s     222"66{CO&&''**  .2-R-R|CT.
*
 )**
 1@  -z11rd   c                    XR                   ;   Gak  XR                  ;  Ga[  U R                   U   n[        UR                  U R                  R                  5       5      nU R                  R                  R                  S:w  d'  U R                  R                  R                  S:X  a5  Ub2  Ub/  [        R                  R                  U5      R                  S:X  d   eUS-   nU R                  R                  X@R                  U R                  R                  5       5      nUcG  UR                  UR                  UR                  /n[        R                   R#                  SXa/U5      nU$ XR$                  S   :X  d   eg)a~  
Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
it back to float32 or float16
    parameter value_name: value to dequantize
    parameter new_nodes_list: List of new nodes created before processing current node
    return: None if there is already a DequantizeLinear node that dequantizes it
            A DequantizeLinear node otherwise
rU   Nr   _DequantizeLinearr~   r   )rC   rE   r   r-  r'   r   rV   rX   rr  rs  r^  r  r6   r)   rp  r<  rY   rr   r.   )rF   
value_namer  r{  dqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes           rR   _dequantize_valueONNXQuantizer._dequantize_value8  sR    222KeKe9e"66zBO &o&@&@$**BXBXBZ[J zz--1AA

  ..2BBzG] ")T->->-G-G
-S-X-X\]-]]]&)<<M JJ88X\XbXbXhXhXjkM$#**#..#++#
 #'++"7"7&}# '& "%9%9!%<<<<rd   c                     U R                   R                  5       R                   H>  nU R                  UR                  5      nUc  M#  U R
                  R                  U5        M@     g)z
Dequantize output if it is quantized
    parameter new_nodes_list: List of new nodes created before processing current node
    return: List of new nodes created
N)r'   r)   r.   r  r+   r6   r   )rF   r.   r  s      rR   r   !ONNXQuantizer._dequantize_outputs_  sM     jj&&(//F"44V[[AO*%%o6 0rd   c           	      >   U R                   c  g U R                  5         0 nU R                    GHk  nU R                   U   n[        U[        5      (       d  [	        S[        U5       SU< S35      eU R                  R                  U0 S9nU R                  nSU;   a  US   R                  nSU;   a  SU;   a
  US   US   pvOU[        R                  R                  :X  a  [        XSR                  S   5      u  pgOUR                  S	UR                   S
   5      nUR                  SUR                   S   5      n	UR                  SU R"                  5      n
UR                  SS5      n[%        X[U
S9u  p['        XXXR(                  5      u  pg[+        XgUS9X'   GMn     U$ )Nr  r  r   )default_valr  r  r  r   ri  r   rj  	symmetricrH   F)rH   r  )r  r  r  )rK   adjust_tensor_rangesr  r   r  ri   tensor_quant_overridesget_per_tensor_overridesrJ   r   rX   r   FLOAT8E4M3FNr   avg_stdgetrange_valueis_activation_symmetricr   r   min_real_ranger   )rF   r>   r   tdquant_overridesr  zeror  ri  rj  r  rH   qminqmaxs                 rR   r=   +ONNXQuantizer.calculate_quantization_paramsk  s   %!!# --K##K0Bb*--"248*E+PQ RSS"99RRS^lnRoO..J.,\:FF
/)lo.M-l;_W=Uet//<<<5j**Q-Pe&**62>>!3DE&**62>>!3DE+//T=Y=YZ	.22>5I4Zfop
.t4yReRef/ATku/v,/ .2 #"rd   )r@   r?   rA   rB   r3   rE   r7   r0   r'   r6   r5   r>   rC   r1   r8   r,   r   )F)NN)NNN)g      ?)FFr  F)TFFr  F)FF)TF))__name__
__module____qualname____firstlineno__r&   rb   ry   r   r   r   r]   r   r   r   r   r   r   r1  r?  rB  rX  r  ndarrayr   rX   r   booltuplerl  r  r  r  r  r  r  r  r  r  r   r=   __static_attributes__ rd   rR   r   r   &   sZ    FMR/> fD
<+ Z
"22ARAh\7|9Px aeD&L" ##J6%ZZ6% jj6% 	6%
 !!6% 6% 
tRZZ$&&	'6%p$1c $1bjj $1T $1LF#P
	
" "
. &*"AKF2L 2@%N
7 #rd   r   )#r   numpyr  rX   onnx.numpy_helperr   r   base_quantizerr   r   	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r  rd   rR   <module>r     sK        & = ! !    & (e#M e#rd   