U
    hN                     @   s   d dl Z d dlmZ d dlZd dlZd dlmZ d dlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
eZG dd dZG dd de	ZdS )    N)Enum)TensorProto)onnx_pb   )ONNXQuantizer)DEQUANT_OP_NAMEQUANT_OP_NAMEQuantizedValueQuantizedValueType__producer____version__add_dequant_output_suffixadd_dequant_suffixadd_quant_input_suffixadd_quant_output_suffixadd_quant_suffixfind_by_name)CreateQDQQuantizerc                   @   s   e Zd ZdZdZdZdS )QDQQuantTensorTyper   r      N)__name__
__module____qualname__
ACTIVATIONWEIGHTZBIAS r   r   J/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/quantization/qdq_quantizer.pyr       s   r   c                   @   s   e Zd ZejddfddZdS )QDQTensorQuantInfoNc                 C   s    || _ || _|| _|d k	| _d S N)tensor_typequant_para_provideraxis	is_shared)selfr   r    r!   r   r   r   __init__'   s    zQDQTensorQuantInfo.__init__)r   r   r   r   r   r$   r   r   r   r   r   &   s   r   c                   @   s   e Zd Zd'ddZdd ZdejfddZd(dd	Zd)d
dZ	dd Z
d*ddZdd Zdd Zdd Zdd Zd+ddZd,ddZdd Zdd  Zd!d" Zd#d$ Zd%d& ZdS )-QDQQuantizerNc                 C   s   t | |||||||||	|
|| i | _g | _g | _d|kr@g n|d | _d|krVdn|d | _d|krldn|d | _d|krdn|d | _| jri | _	d|kri n|d | _
d S )NZ"OpTypesToExcludeOutputQuantizationZAddQDQPairToWeightFZQuantizeBiasTZDedicatedQDQPairZ QDQOpTypePerChannelSupportToAxis)r   r$   tensors_to_quantizebias_to_quantizenodes_to_removeZ'op_types_to_exclude_output_quantizationadd_qdq_pair_to_weightquantize_biasdedicated_qdq_pairtensor_to_its_receiving_nodesZ'qdq_op_type_per_channel_support_to_axis)r#   modelZper_channelZreduce_rangemodeZstaticweight_qTypeactivation_qTypeZtensors_rangeZnodes_to_quantizeZnodes_to_excludeZop_types_to_quantizeZextra_optionsr   r   r   r$   /   sB    	
	zQDQQuantizer.__init__c                 C   sv   t || j }|dk	r,|jtjjkrrdS nF|| jkrb| j| }|j	drr|jj
jtjkrrdS ntd| dS )z2
        Check if tensor can be quantized
        NTr   z\failed to infer the type of tensor: {}. Skip to quantize it. Please check if it is expected.F)r   r-   initializer	data_type
onnx_protor   FLOATZvalue_infostypeZHasFieldr   Z	elem_typeloggingwarningformat)r#   tensor_nameweightvir   r   r   _is_tensor_quantizablez   s    

z#QDQQuantizer._is_tensor_quantizablec                 C   s@   |  |r<|r"t||d| j|< n|| jkr<t|d| j|< dS )a  
        Quantize tensors. If quant_param_tensor is not None, tensor with name tensor_name will be quantized with same
        quantization parameters as tensor quant_param_tensor

        Args:
            tensor_name: name of the tensor to quantize
            quant_sharing_param: name of the tensor that provides quantization parameter
            tensor_type: QDQQuantTensorType default ACTIVATION
        )r   r    )r   N)r<   r   r&   )r#   r9   quant_sharing_paramr   r   r   r   Z__quantize_tensor   s    

 
zQDQQuantizer.__quantize_tensorc                 C   s   |  ||tjS )z
        Quantize Activation Tensor
        Args:
            tensor_name: name of the tensor to quantize
            quant_sharing_param: name of the tensor that provides quantization parameter

        )_QDQQuantizer__quantize_tensorr   r   r#   r9   r=   r   r   r   quantize_activation_tensor   s    z'QDQQuantizer.quantize_activation_tensorc                 C   s   |  ||tjS )z
        Quantize Weight Tensor
        Args:
            tensor_name: name of the tensor to quantize
            quant_sharing_param: name of the tensor that provides quantization parameter

        )r>   r   r   r?   r   r   r   quantize_weight_tensor   s    z#QDQQuantizer.quantize_weight_tensorc                 C   sN   t || j }|r8|jtjjkrJttj	|d| j
|< ntd| d d S )N)r   r!   z9only support per-channel quantization on weight. Tensor: z is not quantized.)r   r-   r1   r2   r3   r   r4   r   r   r   r&   r6   r7   )r#   r9   r!   r:   r   r   r   "quantize_weight_tensor_per_channel   s     z/QDQQuantizer.quantize_weight_tensor_per_channel      ?c                 C   sR   t || j }|d k	r<|jtjjkrN| j||||f nt	
d| d d S )Nz	Expected z to be a weight)r   r-   r1   r2   r3   r   r4   r'   appendr6   r7   )r#   	bias_name
input_nameweight_namebetar:   r   r   r   quantize_bias_tensor   s
    z!QDQQuantizer.quantize_bias_tensorc                 C   s   | j | d S r   )r(   rD   )r#   noder   r   r   remove_node   s    zQDQQuantizer.remove_nodec                 C   s   | j | j d S r   )r-   remove_nodesr(   )r#   r   r   r   rL      s    zQDQQuantizer.remove_nodesc                 C   s   | j  D ]V}| |r
t| |}|  | jr
|jD ](}|| jkrNg | j|< | j| | q6q
| 	  | 
  | jr|   |   | js| j   t| j j _t| j j _| j j S r   )r-   ZnodesZshould_quantize_noder   quantizer+   inputr,   rD   _quantize_normal_tensors_quantize_sharing_param_tensorsr*   _quantize_bias_tensorsrL   r)   Zclean_initializersr   Zproducer_namer   Zproducer_version)r#   rJ   Zop_quantizerr9   r   r   r   quantize_model   s&    







zQDQQuantizer.quantize_modelc                 C   s`   || j kr\t| j | dkr\| j|s\| j|s\| j|| || jkrX| j|= dS dS )Nr   TF)Zquantization_paramslenr-   Zinput_name_to_nodesis_graph_outputZis_graph_inputreplace_output_of_all_nodesr&   )r#   Zupstream_output_nameZoutput_namer   r   r   try_replacing_upstream_output   s    


z*QDQQuantizer.try_replacing_upstream_outputc
                 C   sP   t jjt|||g|g||	d}
t jjt|||g|g||	d}| j|
|g d S )Nr!   )onnxhelper	make_noder   r   r-   Z	add_nodes)r#   q_inputZq_outputZquant_node_nameZdq_input	dq_outputZdequant_node_name
scale_namezp_namer!   Zqlinear_nodedequant_noder   r   r   _create_qdq_nodes   s    zQDQQuantizer._create_qdq_nodesc                 C   s   |j }|d k	r@| jdk r td| j|tjj|| jd\}}}n*| j||t	j
krV| jn| j| jd\}}}t|}| j|| | jrt|}	| ||	t||	|t||||	 n.tjjt|||g|gt||d}
| j|
 d S )N   zLPer-Channel support with QDQ format requires onnx opset version 13 or above.)Zkeep_float_weightrW   )nameZopset_version
ValueErrorZquantize_weight_per_channelr3   r   ZINT8r)   Zquantize_initializerr   r   r/   r0   r   r-   replace_input_of_all_nodesr   r`   r   r   rX   rY   rZ   r   add_node)r#   Zweight_protor   r!   rG   Zq_weight_namer^   r]   Zweight_dequant_outputZweight_quant_outputr_   r   r   r   _add_qdq_pair_for_initializer  sL    
z*QDQQuantizer._add_qdq_pair_for_initializerc                 C   sd  | j r|| jkrt| j| dkrt| j| }t|D ]}d|d  }t|| }t|| }t|| }	t|| }
| |||	|||
|| | j| | }| j	
||| |dkr8t||||tj}|| j|< q8n|}t|}| j	|rt|}|}| j	|| n| j	|| | |t|t|t||t||| t||||tj}|| j|< d S )Nr   _r   )r+   r,   rS   ranger   r   r   r   r`   r-   Zreplace_node_inputr	   r
   ZInputquantized_value_maprT   r   rU   rd   )r#   r9   r]   r^   Znum_dedicated_qdq_pairiZpostfixZ tensor_name_quant_output_postfixZ"tensor_name_dequant_output_postfixZquant_node_name_postfixZdequant_node_name_postfixrJ   quantized_valuer[   r\   r   r   r   _add_qdq_pair_for_activation8  sv    z)QDQQuantizer._add_qdq_pair_for_activationc           
      C   s   | j   D ]\}}|| jkr"q|jst|| j }|rP| ||j	|j
 nH| |\}}| |||\}}}}	}	|std| d| ||| | j |= qd S )Nz4Quantization parameters are not specified for param zb. In static mode quantization params for inputs and outputs of nodes to be quantized are required.)r&   copyitemsri   r"   r   r-   r1   rf   r   r!   Zfind_quant_scale_zpZ_get_quantization_paramsrc   rl   )
r#   r9   tensor_infor1   Z
used_scaleZused_zpZ
data_foundr]   r^   rg   r   r   r   rO   y  s&    
  
z%QDQQuantizer._quantize_normal_tensorsc                 C   sx   | j rt| j   D ]\\}}|j}|| jkr| j |= | j| }t|| j }|d k	r^td| 	||j
|j qq d S )NzBQuantization parameter shared mode is not supported for weight yet)r&   rm   rn   r    ri   r   r-   r1   rc   rl   r]   r^   )r#   r9   ro   Ztensor_provider_namerk   r1   r   r   r   rP     s    

z,QDQQuantizer._quantize_sharing_param_tensorsc           	      C   s8  | j D ]*\}}}}|| jkr q| |||| | jt|| j  | j| }|jdkrt|}t	j
jd|jg|g|t	jjd}n|jdkr|jt	jjt	jjt	jjhkrtd|j d|j|j|jg}t|}|jd k	rt	j
jd||g||jd}nt	j
d||g|}ntd|jd	| j| qd S )
NZCast)rb   to)NDequantizeLinearzUnexpected quantize type z for DequantizeLinear.rq   rW   zUnexpected operator type .)r'   ri   Zquantize_bias_staticr-   Zremove_initializerr   r1   Z	node_typer   rX   rY   rZ   Zq_namer   r4   Z
node_qtypeZFLOAT16ZBFLOAT16RuntimeErrorr]   r^   r!   re   )	r#   rE   rF   rG   rH   Zquant_valueZ	node_namer_   inputsr   r   r   rQ     sP    



z#QDQQuantizer._quantize_bias_tensorsc                 C   s   || j kp|| jkS r   )r&   r'   )r#   r9   r   r   r   is_tensor_quantized  s    z QDQQuantizer.is_tensor_quantized)N)N)N)rC   )N)N)r   r   r   r$   r<   r   r   r>   r@   rA   rB   rI   rK   rL   rR   rV   r`   rf   rl   rO   rP   rQ   ru   r   r   r   r   r%   .   s(    
K





 

2A/r%   )r6   enumr   rX   Zonnx.numpy_helperr   r   r3   Zonnx_quantizerr   Zquant_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   registryr   r   r   r%   r   r   r   r   <module>   s   8