U
    h
                     @   sD   d dl Z d dl mZ ddlmZmZ ddlmZ G dd deZdS )	    N)onnx_pb   )attribute_to_kwarg	ms_domain   )QuantOperatorBasec                       s0   e Zd Z fddZdd Z fddZ  ZS )AttentionQuantc                    s   t  || d S N)super__init__)selfZonnx_quantizerZ	onnx_node	__class__ P/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/quantization/operators/attention.pyr      s    zAttentionQuant.__init__c                 C   s   | j | jS r	   )	quantizerZshould_quantize_nodenode)r   r   r   r   should_quantize   s    zAttentionQuant.should_quantizec                    s  | j }|jdkst|jD ]}|jdkrt    S q| j|dg\}}}}| jj	|dgddd\}}}	}
|
| |
| |
|	 |
|
 |dkrt  S |jsdn|jd	 }g }|
| |
|jd
 g |
| |
t|jdkr|jd ndg |
| |
t|jdkr4|jd ndg i }|jD ]}|t| qFt|d< tjjd||j|f|}|| | j j|7  _dS )z
        parameter node: Attention node.
        parameter new_nodes_list: List of new nodes created before processing this node.
        return: a list of nodes in topological order that represents quantized Attention node.
        Z	AttentionZqkv_hidden_sizesr   r   T)Zreduce_rangeZop_level_per_channelN Z_quantr         domainZ
QAttention)r   Zop_typeAssertionError	attributenamer
   quantizer   Zquantize_activationZquantize_weightextendinputlenupdater   r   onnxhelperZ	make_nodeoutputappendZ	new_nodes)r   r   attrZquantized_input_namesZzero_point_namesZscale_namesZnodesZquantized_input_names_weightZzero_point_names_weightZscale_names_weightZnodes_weightZqattention_nameinputskwargsr   Zqattention_noder   r   r   r      sL    








&
&

zAttentionQuant.quantize)__name__
__module____qualname__r   r   r   __classcell__r   r   r   r   r      s   r   )	r    r   Z
onnx_protoZquant_utilsr   r   Zbase_operatorr   r   r   r   r   r   <module>   s   