U
    h                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlZd dlmZ	 zd dl
mZ W n ek
rd   dZY nX ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ dd	l%m&Z& G d
d dZ'G dd dZ(dS )    N)AnyDict)onnx_pb)to_array_extended   )
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType	QuantType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qTypemodel_has_infer_metadata	ms_domainquantize_data&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                   @   s:   e Zd Zeeef dddZdd Zdd Zdd	 Z	d
S )QuantizationParamsdatac                 K   sh   i | _ | D ]T\}}t|ts4tdt| dt|tttfsXtdt| d|| j |< qd S )NzKeys must be strings not .z#Values must be int, float, str not )r   items
isinstancestr	TypeErrortypeintfloat)selfr   kv r+   K/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/quantization/onnx_quantizer.py__init__/   s    
zQuantizationParams.__init__c                 c   s   | j E d H  d S Nr   r(   r+   r+   r,   __iter__8   s    zQuantizationParams.__iter__c                 C   s
   | j | S r.   r   )r(   keyr+   r+   r,   __getitem__;   s    zQuantizationParams.__getitem__c                 C   s
   t | jS r.   )lenr   r/   r+   r+   r,   __len__>   s    zQuantizationParams.__len__N)
__name__
__module____qualname__r   r#   r   r-   r0   r2   r4   r+   r+   r+   r,   r   .   s   	r   c                   @   s  e Zd ZdDddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! ZdEd"d#ZdFd$d%Zd&d' Zd(d) Zd*d+ ZdGd-d.Zd/d0 ZdHd2d3ZdId5d6ZdJd8d9ZdKd:d;ZdLd<d=Zd>d? Zd@dA Z dBdC Z!dS )MONNXQuantizerNc                 C   st  t |st|}dd |jjD | _| jdd |jjD  | jdd |jjD  t|| _	|sp| j	
  || _|| _|| _|| _d| _|r|ni | _d| jko| jd | _d| jko| jd | _d| jko| jd | _d	| jkr|tjtjfkn| jd	 | _d
| jkrdn| jd
 | _t|d|| _t|d|| _|d k	rttdd | rtdt dd | D  d|| _!|	| _"|
| _#|| _$g | _%d | _&d| _'i | _(| j(dd |jjD  | j(dd |jjD  | j	j	jj)D ]}| j(dd |jD  q| * | _+| jt,kr6t-d| j | . | _/d| _0d| _1d| _2d| _3i | _4| j	5 | _6i | _7d S )Nc                 S   s   i | ]}|j |qS r+   name).0vir+   r+   r,   
<dictcomp>T   s      z*ONNXQuantizer.__init__.<locals>.<dictcomp>c                 S   s   i | ]}|j |qS r+   r9   r;   Zotr+   r+   r,   r=   U   s      c                 S   s   i | ]}|j |qS r+   r9   r;   itr+   r+   r,   r=   V   s      FZEnableSubgraphZForceQuantizeNoInputCheckZMatMulConstBOnlyZWeightSymmetricZActivationSymmetrictensor_typec                 S   s   t | t S r.   )r"   r   )tr+   r+   r,   <lambda>       z(ONNXQuantizer.__init__.<locals>.<lambda>z(tensors_range contains unexpected types c                 s   s   | ]}t |V  qd S r.   )r%   )r;   r*   r+   r+   r,   	<genexpr>   s     z)ONNXQuantizer.__init__.<locals>.<genexpr>z, not TensorData./c                 S   s   i | ]}|j d qS r   r9   r>   r+   r+   r,   r=      s      c                 S   s   i | ]}|j d qS rG   r9   r?   r+   r+   r,   r=      s      c                 S   s   i | ]
}|d qS rG   r+   )r;   output_namer+   r+   r,   r=      s      zunsupported quantization mode Zfixed_quantization_range_uint8Zfixed_quantization_range_int8Z
fixed_zeroZfixed_zero_zp)8r   r   graphZ
value_infovalue_infosupdateoutputinputr   modelZreplace_gemm_with_matmulper_channelreduce_rangemodestaticfuse_dynamic_quantextra_optionsenable_subgraph_quantizationZforce_quantize_no_input_checkZq_matmul_const_b_onlyr   ZQInt8ZQFLOAT8E4M3FNis_weight_symmetricis_activation_symmetricgetattractivation_qTypeweight_qTypeanymapvaluesr$   settensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantize	new_nodesparentgraph_scopetensor_namesnodecheck_opset_versionopset_versionr
   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapZget_non_initializer_inputsgenerated_value_namesused_scale_zp_map)r(   rN   rO   rP   rQ   rR   rZ   rY   r_   r`   ra   rb   rT   rg   r+   r+   r,   r-   C   sl    

"

zONNXQuantizer.__init__c                 C   s~   t jj|d| jjjd}t| t|| j| j| j	| j
| j| j| j| j| j| j| j}| |_| j | d|_|  |jjjS )z
        generate submodel for the subgraph, so that we re-utilize current quantization implementation.
        quantize the submodel
        update subgraph and set it back to node
        zonnx-quantizer)producer_nameZopset_importsrF   )onnxhelperZ
make_modelrN   opset_importr   r8   rO   rP   rQ   rR   rZ   rY   r_   r`   ra   rb   rT   rd   re   quantize_modelrI   )r(   subgraphZ	graph_keyZwarped_modelZsub_quantizerr+   r+   r,   quantize_subgraph   s0    zONNXQuantizer.quantize_subgraphc           	      C   s  dd |j D }t|dkr |S |jr,|jn|j dt| j }i }|j D ]}|jtjjkr|j| 	|j
| d|j i}n\|jtjjkrg }|jD ].}|| 	|| d|j dt| g q|j|i}nt|}|| qLtjj|j|j|jfd|ji|S )z|
        Check subgraph, if any, quantize it and replace it.
        return new_nodes added for quantizing subgraph
        c                 S   s,   g | ]$}|j tjjks$|j tjjkr|qS r+   )r%   ru   AttributeProtoGRAPHGRAPHS)r;   attrr+   r+   r,   
<listcomp>   s    z>ONNXQuantizer.quantize_node_with_sub_graph.<locals>.<listcomp>r   Z_node_count_:r:   )	attributer3   r:   op_typerc   r%   ru   r{   r|   rz   gr}   Zgraphsextendr   rK   rv   	make_noderM   rL   )	r(   rg   Zgraph_attrsZ	node_namekwargsr~   kvvaluery   r+   r+   r,   quantize_node_with_sub_graph   s0    "
"
z*ONNXQuantizer.quantize_node_with_sub_graphc                 C   s  dd | j j jD }t|dkr(td|d j}|dkrNtd| dS |dk rtd| | j j j|d  | j j j	t
jd	d
g d
}|dk r| jtjjkrtd| | j j j|d  | j j j	t
jd	dg d| j j _d}d| _|S )Nc                 S   s    g | ]}|j r|j d kr|qS )zai.onnxdomainr;   opsetr+   r+   r,   r      s     
 z5ONNXQuantizer.check_opset_version.<locals>.<listcomp>r   z$Failed to find proper ai.onnx domainr   
   zThe original model opset version is {}, which does not support node fusions. Please update the model to opset >= 11 for better performance.zThe original model opset version is {}, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       zThe original model opset version is {}, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   T)rN   rw   r3   rj   versionloggingwarningformatremover   ru   rv   Zmake_opsetidrZ   
onnx_protoTensorProtoFLOAT8E4M3FNZ
ir_versionrS   )r(   Zai_onnx_domainri   r+   r+   r,   rh      sD    

z!ONNXQuantizer.check_opset_versionc                 C   s   t dd | j D S )zQ
        Detect if model already has QuantizeLinear or DequantizeLinear.
        c                 s   s"   | ]}|j d kp|j dkV  qdS )QuantizeLinearDequantizeLinearN)r   r;   rg   r+   r+   r,   rE     s    z.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>)r[   rN   nodesr/   r+   r+   r,   has_QDQ_nodes  s    zONNXQuantizer.has_QDQ_nodesc                 C   s2   t || j d k	rdS | jd k	r.| j|S dS )NTF)r   rN   initializerrd   find_initializer_in_path)r(   Zinitializer_namer+   r+   r,   r     s
    
z&ONNXQuantizer.find_initializer_in_pathc                 C   s2   | j | |D ]}|jD ]}| j| qqd S r.   )rc   r   rL   rr   add)r(   r   rg   rH   r+   r+   r,   add_new_nodes"  s    
zONNXQuantizer.add_new_nodesc                 C   sH  |   rtd | j D ]d}| jr0| |}t| j}t	| |}|
  t|t| jD ]"}| j| jD ]}| j| qlq\q|   | j d | j j| j | jd kr| j \}}t|dkrtdt| t| jj_t| jj_dd | jjjD }|s@dd | jD }	|	r@| jjj }
d|
_t|
_| jjS )	NzPlease check if the model is already quantized.Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.rg   r   z0Invalid model with unknown initializers/tensors.c                 S   s   g | ]}|j tkr|qS r+   )r   r   r   r+   r+   r,   r   K  s     
 z0ONNXQuantizer.quantize_model.<locals>.<listcomp>c                 S   s   g | ]}|j d kr|qS )zcom.microsoftr   r   r+   r+   r,   r   M  s     
 r   ) r   r   r   rN   r   rU   r   r3   rc   r   quantizerangerL   rr   r   _dequantize_outputsrI   Z
ClearFieldrg   r   rd   Zclean_initializersRuntimeErrorr#   r   rt   r   Zproducer_versionrw   r   r   r   )r(   rg   Znumber_of_existing_new_nodesZop_quantizerirH   _Zinitializers_not_foundZms_opsetZms_nodesr   r+   r+   r,   rx   (  s<    





zONNXQuantizer.quantize_modelc                 C   s   t || j }|d k	S r.   )r   rN   r   )r(   
input_namer   r+   r+   r,   is_input_a_initializerU  s    z$ONNXQuantizer.is_input_a_initializerc                 C   s   | j S r.   )rO   r/   r+   r+   r,   is_per_channelY  s    zONNXQuantizer.is_per_channelc                 C   sF   t || j }|d k	r&|jtjjkS | jr6| jd kr:dS | j	|S )NF)
r   rN   r   	data_typer   r   FLOATrU   rd   is_valid_quantize_weight)r(   weight_nameweightr+   r+   r,   r   \  s    z&ONNXQuantizer.is_valid_quantize_weightc                 C   sz   |  |r| |S || jkrN| j| }|jdrv|jjjtjj	fkrvdS n(| j
rf| jrf| j|S td| dS )NrA   TzzFailed to infer data type of tensor: {}. Please add data type info for this tensor if your model has customized operators.F)r   r   rJ   r%   ZHasFieldrA   Z	elem_typer   r   r   rU   rd   is_float_tensorr   r   r   )r(   tensor_namer<   r+   r+   r,   r   d  s    



 zONNXQuantizer.is_float_tensorc                 C   sV   | j d k	r(t| j dkr(|j| j kr(dS |j| jkr8dS | jd k	rR|j| jkrRdS dS )Nr   FT)r`   r3   r:   r   rb   ra   )r(   rg   r+   r+   r,   should_quantize_nodev  s    
z"ONNXQuantizer.should_quantize_nodec                 C   s\   |t jjkr| ||S |t jjkr0| ||S |t jjkrH| ||S td| ddS )aZ  
        Create nodes for dynamic quantization of input and add them to nodes_list.
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter qType: type to quantize to.
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        zUnexpected value for qType=r    N)	r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8r   Z3_get_dynamic_input_quantization_params_float8e4m3fnrj   )r(   r   
nodes_listqTyper+   r+   r,   &_get_dynamic_input_quantization_params  s    z4ONNXQuantizer._get_dynamic_input_quantization_paramsc                 C   s  t jj}|d }|d }tjjd|g|d g|dd}|| |d }tjjd|g|d g|dd}|| |d	 }	tjd
|jd g|	d g|	}
||
 |d	 }tjd
|jd g|d g|}|| |d }tjd|
jd |jd g|d g|}|| tj| j	t jj
g t|d g}| j| |d }tjd|jd | j	g|g|}|| tj| j|g dg}| j| || jg g fS )a/  
        Create nodes for dynamic quantization of input to int8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        _scale
_ReduceMin	ReduceMin:0r   Zkeepdims
_ReduceMax	ReduceMaxZ_AbsZAbsZ_Abs_MaxZMaxg       @Z	scale_DivDiv)r   r   r   ru   rv   r   appendrL   make_tensorrn   r   r   rN   add_initializerrp   )r(   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodeZreduce_min_abs_nameZreduce_min_abs_nodeZreduce_max_abs_nameZreduce_max_abs_nodeZabs_max_nameZabs_max_nodeZinitializer_divscale_div_namescale_div_nodeZinitializer_zpr+   r+   r,   r     s|    







z9ONNXQuantizer._get_dynamic_input_quantization_params_int8c                 C   s  t jj}|d }|d }|d }tjjd|g|d g|dd}|| |d }tjjd	|g|d g|dd}	||	 tj| jt jj	g t
|g}
| j|
 tj| jt jj	g d
g}| j| |d }tjd|	jd |jd g|d g|}|| |d }tjd|jd | jg|g|}|| |d }tjd| j|jd g|d g|}|| |d }tjd|jd |g|d g|}|| |d }tjd|j|d g|}|| |d }tjjd|j|g||d}|| ||g g fS )a0  
        Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        r   _zero_pointr   r   r   r   r   r   r           Z
_scale_SubZSubZ
_scale_Divr   Z_zero_point_SubZ_zero_point_DivZ_zero_point_FloorZFloorZ_zero_point_CastCast)to)r   r   r   ru   rv   r   r   r   rm   r   r   rN   r   ro   rL   )r(   r   r   r   r   Zinput_zp_namer   r   r   r   Zinitializer_qrangeZinitializer_qvalueZscale_sub_nameZscale_sub_noder   r   Zzp_sub_nameZzp_sub_nodeZzp_div_nameZzp_div_nodeZzp_floor_nameZzp_floor_nodeZzp_cast_nameZzp_cast_noder+   r+   r,   r     s    







z:ONNXQuantizer._get_dynamic_input_quantization_params_uint8c                 C   sN  |dks|dkr| j dks$|| j kr:td| d dS | j | }t|tshtdt| d|d|dks|t|dkrtd	| d
| |d g}|d g}n|g}|g}g }|d }| j	}	g }
|d }t
j||	||}| j| |	tjjtjjtjjtjjhkrtjj}ntjj}t
j|||
|}| j| d|||
|fS )a\  
        Create initializers and inputs in the graph for zero point and scale of output.
        Zero point and scale values are obtained from self.quantization_params if specified.
            parameter param_name: Name of the quantization parameter.
            return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
        Nz$Quantization parameters for tensor:"z" not specified)Fr   r   r   r   Unexpected type  for r       zYQuantization parameters should contain zero point and scale. Specified values for output z: 
zero_pointscaler   r   T)rl   r   infor"   r   r$   r%   r3   rj   rY   ru   rv   r   rN   r   r   r   r   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZr   )r(   
param_nameZ	use_scaleZuse_zeropointparamsZzero_point_valuesZscale_valuesZzero_point_shapeZzero_point_nameZzero_point_typescale_shape
scale_nameZinit_zpZ
scale_typeZ
init_scaler+   r+   r,   _get_quantization_paramsF  sB    



z&ONNXQuantizer._get_quantization_paramsc                 C   s  |j | }|t }|d }|dk	r<|dk	r<d||  }	}
}n| |\}	}
}}}g }|	rttjd||
|g|g|}nx| jr~dS | jr|tj	j
kr|d }
|d }tjd|g||
|g|}n0| |||\}
}}}tjd||
|g|g|}t|||
||| j|< ||fS )a  
        Given an input for a node (which is not a initializer), this function

        - add nodes to compute zero point and scale for this input if they don't exist.
        - add new QuantizeLinear node to quantize the input.

        :param node: node being quantized in NodeProto format.
        :param input_index: index of input in node.input.
        :param qType: type to quantize to.
        :param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
        :param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
        :return: List of newly created nodes in NodeProto format.
        _QuantizeLinearNTr   r   r   ZDynamicQuantizeLinear)rM   r	   r   ru   rv   r   rR   rS   r   r   r   r   r   rq   )r(   rg   input_indexr   Zgiven_scale_nameZgiven_zp_namer   rH   Zql_node_nameZ
data_foundr   zp_namer   r   qlinear_noder   Zzp_shaper+   r+   r,   _get_quantize_input_nodesy  sN    
z'ONNXQuantizer._get_quantize_input_nodesc                 C   sD   t |trt|dkstd|| jks6t| d|| j|< d S )Nr   z(value must be scale(float) and zeropointz has been setted before)r"   tupler3   AssertionErrorrs   )r(   r   r   r+   r+   r,   set_quant_scale_zp  s    z ONNXQuantizer.set_quant_scale_zpc                 C   s.   || j kr| j | S | jd k	r*| j|S dS )N)NN)rs   rd   find_quantized_valuer(   r   r+   r+   r,   find_quant_scale_zp  s
    


z!ONNXQuantizer.find_quant_scale_zpc                 C   s.   || j kr| j | S | jd k	r*| j|S d S r.   )rq   rd   r   r   r+   r+   r,   r     s
    


z"ONNXQuantizer.find_quantized_value      ?c              
   C   s  || j kr| j | jS | j | j}t|| j }t|}t|| j }t|}	|t }
|| j krr| j | j}n0|| jkr| 	|\}}}}}nt
d| dt|| j }t|}| jtjjkr&t|	}|tj}tjdg|jd}|d}tj||
}| j|g d}tjj}nz|| | }t|	|  tj}tj|tjd|j}tj||
}| j|g tj|tjdd}d}| j}|
d }|  rtj||}ntj |tjjg |}| j|g | jtjjkr| j}ntjj!}|
d	 }| jtjjkr8tj || jdgd
g}nF|  rjtj"|j#tjdd}tj||}ntj ||g dg}| j|g || j kst$t%||
||t&j'|j(dkrdnd||d}|| j |< |
S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        z	Expected z5 to be in quantized value map for static quantizationr   dtyper   r   r   r   r   r   N)	node_type
node_qtype))rq   q_namer   r   rN   r   r   r	   rl   r   rj   rZ   r   r   r   npasarrayZastypeZfloat32arrayr   reshaperu   numpy_helper
from_arrayinitializer_extendr   roundZint32dimsr   rv   r   ZINT32zerosshaper   r   r   Initializersize)r(   Z	bias_namer   r   betaZweight_scale_nameZweight_initializerZweight_scaleZbias_initializerZ	bias_dataZquantized_bias_namer   r   Zinputscale_initializerZinput_scaler   Zquantized_dataZ
bias_scaleZbias_scale_dataZpacked_bias_initializerr   r   Zbias_np_dataZquantized_bias_scale_nameZpacked_bias_scale_initializerrA   Zquantized_bias_zp_nameZpacked_bias_zp_initializerZbias_zp_dataquantized_valuer+   r+   r,   quantize_bias_static  s    






   


z"ONNXQuantizer.quantize_bias_staticc                 C   s   || j kp|| jkp|| jkS )zq
        only check for value info and newly generated tensor names, initializers are checked separately
        )rJ   rf   rr   )r(   r   r+   r+   r,   contains_tensor.  s
    
zONNXQuantizer.contains_tensorFc              	   C   s   | j ||dddd|dS )NFr   rg   indicesinitializer_use_weight_qTyperP   op_level_per_channelaxisfrom_subgraph_ONNXQuantizer__quantize_inputs)r(   rg   r  r  r+   r+   r,   quantize_activation8  s    z!ONNXQuantizer.quantize_activationr   c              	   C   s   | j ||d||||dS )NTr  r	  )r(   rg   r  rP   r  r  r  r+   r+   r,   quantize_weightE  s    	zONNXQuantizer.quantize_weightTc              
   C   sn  g }g }	g }
g }|D ]J}|j | }|| jkr^| j| }||j |	|j |
|j q|s|
d |d |	d qt|| j }|dk	r| j	r|r| 
|j|r| jn| j||\}}}n | ||r| jn| j|\}}}|
| |	| || q| |r| j|d | j| j }|dkr| ||| j}|dkr^ dS |rp| | n
|| |d }|jdkr|
|j ||j d  |	|j d  n0|
|jd	  ||jd  |	|jd  q| jdk	rJ| jj||g||||d
d\}}}}|
|d	  ||d	  |	|d	  qtd| d| j q|
|	||fS )a  
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        r   Nr   )NNNNr   r   r   r   r   T)r  rP   r  r  r  z!Invalid tensor name to quantize: z @graph scope)rM   rq   r   r   r   r   r   rN   r   rO   quantize_weight_per_channelr:   rZ   rY   quantize_initializerr  find_node_by_namerc   rI   r   r   r   r   rL   rd   r
  rj   re   )r(   rg   r  r  rP   r  r  r  Zscale_namesZzero_point_namesZquantized_input_namesr   r   Z
node_inputr   r   q_weight_namer   r   r   Zquantize_input_nodesZparent_quantized_input_namesZparent_zero_point_namesZparent_scale_namesr   r+   r+   r,   Z__quantize_inputsX  s    











  



zONNXQuantizer.__quantize_inputsc                 C   s  |j | jkr(| j|j  }|j|j|jfS |j t }|j d }|j d }t|}	|	  }
t	|
|| j
| jol|\}}}}}|tjjtjjtjjtjjhkrtjj}ntjj}tj||g |g}tj||g |g}| j||g |s| jtjjkrt }| j|_|j|j ||_ |   |_tdk	rt|}|j |	j ksb| | krt!d|	j  d| dd  d| dd  d|j  d	t"|dd
  dn,t#j$|tj%j&| d'|j}tj()||}| j|g t*|j |||t+j,d}|| j|j < |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r    r   )-r:   rq   r   r   r   r	   r   flattentolistr   rV   rP   ru   r   r   r   r   r   r   r   rv   r   rN   r   rZ   r   r   r   copytobytesraw_datar   r   r   r#   r   r   mappingTENSOR_TYPE_TO_NP_TYPEr   r   r   r   r   r   )r(   r   r   rP   keep_float_weightr   r  r   r   Zweight_dataZw_datar   r   r   Zq_weight_dataZscale_dtypescale_initializerzero_initializerq_weight_initializercheckr+   r+   r,   r    sp    	




 Jz"ONNXQuantizer.quantize_initializerc                  C   s  || j kr$| j | }|j|j|jfS t|| j }|d krFtd|t|}|j	| }	g }
g }g }g }g }t
|	D ]~}|||}t|  || jp|tjjtjjfk| jo|\}}}}}|
| || || || || qtt|j	}d||< t|d |}t
dt|D ]*}t|| |}t||f|}q(|t }|d }|d }t||||tjd }|| j |< |j | g}t!j"#|tjj$||}t!j"#||||}| j%||g |stj|t!j&j'| d|j }t!j()||}| j%|g |||fS )Nz{} is not an initializerr   r   r   r   r   )*rq   r   r   r   r   rN   r   rj   r   r   r   Ztaker   r  r  rV   r   r   r   r   rP   r   listr   r   r   r3   Zconcatenater	   r   r   r   r   ru   rv   r   r   r   r  r  r   r   ) r(   r   rZ   Zchannel_axisrP   r  r   r   weightsZchannel_countZ	rmin_listZ	rmax_listZzero_point_listZ
scale_listZquantized_per_channel_data_listr   Zper_channel_datarminrmaxr   r   Zquantized_per_channel_dataZreshape_dimsZquantized_weightsZchannel_weightsr  r   r   Zzero_scale_shaper  r  r  r+   r+   r,   r    s    	










   
z)ONNXQuantizer.quantize_weight_per_channelc                 C   s   || j kr|| jkr| j | }|d }| j|| j| j }|dkrn|j|j|jg}t	j
d||g|}|S ||jd kstdS )a  
        Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
        it back to float32
            parameter value_name: value to dequantize
            parameter new_nodes_list: List of new nodes created before processing current node
            return: None if there is already a DequantizeLinear node that dequantizes it
                    A DequantizeLinear node otherwise
        Z_DequantizeLinearNr   r   )rq   rr   rN   r  rc   rI   r   r   r   ru   rv   r   rL   r   )r(   Z
value_namer   Zdqlinear_nameZdqlinear_nodeZdqlinear_inputsdequantize_noder+   r+   r,   _dequantize_valueq  s$    	
   zONNXQuantizer._dequantize_valuec                 C   s6   | j  jD ]$}| |j}|dk	r| j| qdS )z
        Dequantize output if it is quantized
            parameter new_nodes_list: List of new nodes created before processing current node
            return: List of new nodes created
        N)rN   rI   rL   r#  r:   rc   r   )r(   rL   r"  r+   r+   r,   r     s    z!ONNXQuantizer._dequantize_outputsc                 C   sx  | j d krd S | j D ]}|jdkr(q| jr0q| |s<qt| j |jd  dkrZq|jd | j ks|j	d | j kr|q| j |j	d  }t
|tstdt| d|j	d d|| j |jd < qi }| j D ]}| j | }t
|tstdt| d|d| jtjjkr.t| j|jd \}}n4|j\}}t| j| jd\}	}
t|||	|
| j\}}t||d||< q|S )	N)ZClipZRelur   r   r   r   r    )Z	symmetric)r   r   )r_   rN   r   r   rW   r   r3   Zinput_name_to_nodesrM   rL   r"   r   r$   r%   rY   ru   r   r   r   Zavg_stdZrange_valuer   r   r   )r(   rg   tdrl   r   Zzeror   r   r!  ZqminZqmaxr+   r+   r,   rk     s:    


 
 


z+ONNXQuantizer.calculate_quantization_params)N)NN)NN)r   )F)FFr   F)TFFr   F)FF)TF)"r5   r6   r7   r-   rz   r   rh   r   r   r   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r
  r  r  r#  r   rk   r+   r+   r+   r,   r8   B   sV    
g"(-S]
3
>
d

    
     
s
W  
Tr8   ))r   typingr   r   Znumpyr   ru   Zonnx.numpy_helperr   r   Zonnx.reference.op_runr   ImportErrorZ	calibrater   Z
onnx_modelr   Zquant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r8   r+   r+   r+   r,   <module>   s   
T