U
    h                     @   s   d dl Z d dlZd dlmZ ddlmZmZmZmZm	Z	 ddl
mZ ddlmZ G dd	 d	eZG d
d deZG dd deZG dd deZdS )    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypefind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                       s$   e Zd Z fddZdd Z  ZS )	QOpMatMulc                    s   t  || d S Nsuper__init__selfZonnx_quantizerZ	onnx_node	__class__ M/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/quantization/operators/matmul.pyr      s    zQOpMatMul.__init__c                 C   s   | j | jsdS | j | jjd s>| j | jjd s>dS | j jr|| j | jjd s|td| j j d| jj	 d dS dS )NFr	   r   z%Ignore MatMul due to non constant B: []T)
	quantizerZshould_quantize_nodenodeZis_float_tensorinputZq_matmul_const_b_onlyZfind_initializer_in_pathprintZgraph_scopename)r   r   r   r   should_quantize   s    zQOpMatMul.should_quantize)__name__
__module____qualname__r   r   __classcell__r   r   r   r   r      s   r   c                       s$   e Zd Z fddZdd Z  ZS )MatMulIntegerc                    s   t  || d S r   r   r   r   r   r   r   &   s    zMatMulInteger.__init__c                 C   s  | j }|jdkst| j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 |jd d }
|jr|jd nd}t	j
d	|| |
g|}|| |
d
 }t	j
jd|
g|g|
d tjjd}|| t|dkst|r|d n|d d |d  d }t|| jj}|d krRt||d |}|| |jd }d}|rn|d }|t||g|jd | | j j|7  _d S )NMatMulr   r	   TZreduce_rangeZop_level_per_channelZ_output_quantized_quant r#   Z_cast_outputZCast_cast)tor   Z_scales_mul_Z_mulz:0Z_output_scale_mul)r   op_typeAssertionErrorr   quantize_activationquantize_weightextendoutputr   onnxhelper	make_nodeappend
onnx_protoZTensorProtoFLOATlenr   	new_nodesr   )r   r   quantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightZmatmul_integer_outputZmatmul_integer_nameZmatmul_integer_nodeZcast_op_outputZ	cast_nodeZscales_mul_opZscales_mul_nodeZscales_mul_op_outputZoutput_scale_mul_opr   r   r   quantize)   sr    








zMatMulInteger.quantizer   r    r!   r   rA   r"   r   r   r   r   r#   %   s   r#   c                       s(   e Zd Z fddZ fddZ  ZS )QLinearMatMulc                    s   t  || d S r   r   r   r   r   r   r   x   s    zQLinearMatMul.__init__c                    s  | j }|jdkst| j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 | j|jd \}
}}}}|
r|d krt	 
 S |jd t }|jr|jd nd}g }||d  ||d  ||d  ||d  ||d  ||d  || || tjd||g|}|| t|jd |||tj}|| jj|jd < | j j|7  _d S )	Nr$   r   r	   Tr%   r&   r'   rC   )r   r+   r,   r   r-   r.   r/   Z_get_quantization_paramsr0   r   rA   r   r   r4   r1   r2   r3   r   r   ZInputZquantized_value_mapr8   )r   r   r9   r:   r;   r<   r=   r>   r?   r@   Z
data_foundZoutput_scale_nameZoutput_zp_namer*   Zqlinear_matmul_outputZqlinear_matmul_nameZqlinear_matmul_inputsZqlinear_matmul_nodeZq_outputr   r   r   rA   {   sl    







zQLinearMatMul.quantizerB   r   r   r   r   rC   w   s   rC   c                       s$   e Zd Z fddZdd Z  ZS )	QDQMatMulc                    s   t  || d S r   r   r   r   r   r   r      s    zQDQMatMul.__init__c                 C   s   | j }|jdkst| jr"|j}nt|j|j}|D ]N}| j	 rxt
|| jj rx| jj|jd}| j|| q6| j| q6d S )Nr$   r	   )r   r+   r,   Zdisable_qdq_for_node_outputr   	itertoolschainr0   r   Zis_per_channelr   modelZinitializerZ'qdq_op_type_per_channel_support_to_axisgetZ"quantize_weight_tensor_per_channelZquantize_activation_tensor)r   r   Znodes_to_iterateZtensor_nameZchannel_axisr   r   r   rA      s    zQDQMatMul.quantizerB   r   r   r   r   rD      s   rD   )rE   r1   r   r5   Zquant_utilsr   r   r   r   r   Zbase_operatorr
   Zqdq_base_operatorr   r   r#   rC   rD   r   r   r   r   <module>   s   RI