U
    h^                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlZd dlm	Z	m
Z
mZmZmZmZmZ d dlmZmZ d dlmZ eeZdd Zd5d
dZd6ddZdd Zddddddddddddddddd d!d"d#d$d%d&d'd(d)d*gZd+gd,d+gd,d+gd-ZG d.d/ d/Zd7d1d2Zd8d3d4Z dS )9    N)Dict)AttributeProto
GraphProto
ModelProto	NodeProtoTensorProtohelpernumpy_helper)infer_shapesinfer_shapes_path)versionc                 C   s   dd | D S )z|
    Convert numpy float16 to python int.

    :param np_list: numpy float16 list
    :return int_list: python int list
    c                 S   s.   g | ]&}t t|d dd ddqS )H   N   )intbinviewzfill).0_ r   D/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/float16.py
<listcomp>%   s     z%_npfloat16_to_int.<locals>.<listcomp>r   )Znp_listr   r   r   _npfloat16_to_int   s    r   "\o>     @c                 C   s  dd }| t | dk jd dkr| t | dk  }| t | dk  }||krntd| d|  ||krtd| d|  | t | dk  jd dkr| t | dk   }| t | dk   }|| krtd| d|   || krtd| d|   t |d| ||| } t || | d| | } t ||| td|| } t |td| | | | } t | S )a?  
    Convert float32 numpy array to float16 without changing sign or finiteness.
    Positive values less than min_positive_val are mapped to min_positive_val.
    Positive finite values greater than max_finite_val are mapped to max_finite_val.
    Similar for negative values. NaN, 0, inf, and -inf are unchanged.
    c                 S   s   t | |k ||k S N)nplogical_and)abcr   r   r   between0   s    z&convert_np_to_float16.<locals>.betweenr   zthe float32 number z will be truncated to infz-inf)	r   whereshapemaxminloggerdebugfloatfloat16)Znp_arraymin_positive_valmax_finite_valr"   Zpositive_maxZpositive_minZnegative_maxZnegative_minr   r   r   convert_np_to_float16(   s(    
r.   c                 C   s   t | tstdt|  | jtjkrtj| _| jrntt	
| j||}t|}|| jdd< g | jdd< | jrt	j| jdd}t|||}| | _| S )a  Convert tensor float to float16.

    Args:
        tensor (TensorProto): the tensor to convert.
        min_positive_val (float, optional): minimal positive value. Defaults to 1e-7.
        max_finite_val (float, optional): maximal finite value. Defaults to 1e4.

    Raises:
        ValueError: input type is not TensorProto.

    Returns:
        TensorProto: the converted tensor.
    3Expected input type is an ONNX TensorProto but got Nfloat32dtype)
isinstancer   
ValueErrortype	data_typeFLOATFLOAT16
float_datar.   r   arrayr   Z
int32_dataraw_data
frombuffertobytes)tensorr,   r-   float16_dataZint_listZfloat32_listZfloat16_listr   r   r   convert_tensor_float_to_float16J   s    

r@   c                 C   s   t | j}t| j| j|S r   )r	   Zto_arrayr%   r   Zmake_tensor_value_infonamer6   )r>   r%   r   r   r   make_value_info_from_tensoro   s    rB   ZArrayFeatureExtractorZ	BinarizerZCastMapZCategoryMapperZDictVectorizerZFeatureVectorizerZImputerZLabelEncoderZLinearClassifierZLinearRegressorZ
NormalizerZOneHotEncoderRandomUniformLikeZSVMClassifierZSVMRegressorZScalerZTreeEnsembleClassifierZTreeEnsembleRegressorZZipMapZNonMaxSuppressionZTopKZRoiAlignZRangeZCumSumZMinZMaxZUpsampler      )ZResizeZ	GroupNormZSkipGroupNormc                   @   s,   e Zd ZdZedddZedddZdS )	InitializerTrackerz'Class for keeping track of initializer.)initializerc                 C   s   || _ g | _g | _d S r   )rF   
fp32_nodes
fp16_nodes)selfrF   r   r   r   __init__   s    zInitializerTracker.__init__)nodec                 C   s"   |r| j | n| j| d S r   )rG   appendrH   )rI   rK   is_node_blockedr   r   r   add_node   s    zInitializerTracker.add_nodeN)__name__
__module____qualname____doc__r   rJ   r   rN   r   r   r   r   rE      s   rE   Fc	           )         s	  |dkst d|tttjjks.t d|dkr:i n|}	t| tr| }
t	t
jt	dkr|stjtj|
d$}|j}t|
| t
|} d}W 5 Q R X n
t
|
} t| tstdt|  d}|st	t
jt	d	krzt}W 5 X |dkrt}|dkrg }t|}t|}td
| d| d  d| d| d| d|  g }g }g }g }|dk	rx|| } ||  i }t }t }dd | jjD }dd | jjD }t t r fdd|D } fdd|D }n sg }g }t!| jjD ]\}}|j|krdt| }|||j< |"|j dt| }| jj#" }|$| ||_t%j&|jj'_(t)j*d|jg|gt%j&|dg}| jj+,| || |"| qt!| jjD ]\}}|j|krdt| }|||j< |"|j dt| }| jj#" }|$| ||_t%j&|jj'_(t)j*d|g|jgd|dg}| jj+,| || |"| qi }|rg }|D ]n} t| tr|| j t| t-r| j.D ]2}|j/t%j0kr|j|kst t1|||j< q| j+D ]}|j|krqt2t3|jD ]*}|j| |kr||j|  |j|< qt2t3|jD ]*}|j| |krB||j|  |j|< qB|j4|kp|j|k}!t!|jD ]P\}}||kr|!p|t56|j4g ko||	6|j4g k}"|| 7||" q|!r|| n|j4dkr4|j8D ].}#|#jdkr|#j9t%j0krt%j&|#_9 q4q|j4dkrd}$|j8D ],}#|#jd krJd}$|#j9t%j0krJt%j&|#_9qJ|j4d!kr|$s|j8,t):d t%j&g |j4t5ks|j4|	kr|j8D ]}#||# qn
|| qt| t;rB|| j< | j=D ]}|| q | j>$t?| j>|| | j@D ]}t?|||}q.t| t-rztAB| j| j| j#D ]}|jj'j(t%j0kr|j|krt%j&|jj'_(|| |jCd"rb|jjDj(j'j(t%j0krb|j|krbt%j&|jjDj(j'_(|| qbqz|}ql|E D ]T}%|s|%jFrt?|%j.|||%_.|tG|%j. |%jHr|stId#J|%jF q|D ]}&t!|&jD ]\}}|t5|&j4 ksd||	6|&j4g krqd|D ]}'||'jkr| jj#" }|$|' |&jd$ t| }||_t%j0|jj'_(|&jd% t| }t)j*d|g|gd|dg}| jj+,| ||&j|<  qdqqdqV|D ]~}&t2t3|&jD ]}|&j| }|D ]}'||'jkrb| jj#" }|$|' |&jd$ t| }||_t%j0|jj'_(|&jd% t| }t)j*d|g|gd|dg}| jj+,| ||&j|<  qPqbqPt2t3|&jD ]}|&j| }(|D ]}'|(|'jk	r| jj#" }|$|' |&jd& t| }||_t%j0|jj'_(|&jd' t| }t)j*d|g|(gd(|dg}| jj+,| ||&j|<  	q	q	qq<| S ))a  Convert tensor float type in the input ONNX model to tensor float16.

    Args:
        model (ModelProto or str): The ONNX model or path of the model to convert.
        min_positive_val (float, optional): minimal positive value. Defaults to 5.96e-08.
        max_finite_val (float, optional): maximal finite value of float16. Defaults to 65504.
        keep_io_types (Union[bool, List[str]], optional): It could be boolean or a list of float32 input/output names.
                                                          If True, model inputs/outputs should be left as float32.
                                                          Defaults to False.
        disable_shape_infer (bool, optional): Skips running onnx shape/type inference.
                                              Useful if shape inference has been done. Defaults to False.
        op_block_list (List[str], optional): List of op types to leave as float32.
                                             Defaults to None, which will use `float16.DEFAULT_OP_BLOCK_LIST`.
        node_block_list (List[str], optional): List of node names to leave as float32. Defaults to None.
        force_fp16_initializers(bool): force converting all float initializers to float16.
                                       Default to false, which will convert only the one needed to avoid precision loss.
        force_fp16_inputs(Dict[str, List[int]]): Force the conversion of the inputs of some operators to float16, even if
                                                 this script's preference it to keep them in float32.
    Raises:
        ValueError: input type is not ModelProto.

    Returns:
        ModelProto: converted model.
    r   zginvalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05z4invalid max_finite_val. largest float16 value: 65504Nz1.8.0)dirTz$Expected an ONNX ModelProto but got z1.2.0z"fp16 parameters: min_positive_val=z max_finite_val=z keep_io_types=z disable_shape_infer=z op_block_list=z node_block_list=z force_fp16_initializers=c                 S   s"   g | ]}|j jjtjkr|jqS r   r5   tensor_type	elem_typer   r7   rA   r   nr   r   r   r     s      z,convert_float_to_float16.<locals>.<listcomp>c                 S   s"   g | ]}|j jjtjkr|jqS r   rT   rW   r   r   r   r     s      c                    s   g | ]}| kr|qS r   r   rW   keep_io_typesr   r   r   	  s      c                    s   g | ]}| kr|qS r   r   rW   rY   r   r   r   
  s      Zgraph_input_cast_Zgraph_input_castZCast)torA   Zgraph_output_cast_Zgraph_output_castrD   r[   )ZEyeLikeZMultinomialRandomNormalZRandomNormalLikeRandomUniformrC   SequenceEmptyZ	BernoulliFr2   )r\   r]   r^   sequence_typezZinitializer is used by both fp32 and fp16 nodes. Consider add these nodes to block list:{}Z_input_cast_Z_input_castZ_output_cast_Z_output_cast
   )KAssertionErrorr*   r   Zfinfor+   r&   r3   strr   parseonnx__version__tempfileNamedTemporaryFileospathdirnamerA   r   loadr   r4   r5   r
   DEFAULT_OP_BLOCK_LISTsetr(   r)   rL   graphinputoutputlist	enumerateadd
value_infoZCopyFromr   r8   rU   rV   r   Z	make_noderK   extendr   rF   r6   r7   rE   rangelenZop_typeALWAYS_FLOAT_INPUTSgetrN   	attributeiZmake_attributer   gZgraphstr@   Ztensors	itertoolschainZHasFieldr_   valuesrH   rB   rG   infoformat))modelr,   r-   rZ   Zdisable_shape_inferZop_block_listZnode_block_listZforce_fp16_initializersZforce_fp16_inputsZforce_fp16_inputs_dictZ
model_pathZtmpfileZshape_infer_model_pathZfunc_infer_shapequeueZvalue_info_listZ	node_listZmixed_float_type_node_listZname_mappingZgraph_io_to_skipZio_castsZfp32_inputsZfp32_outputsr{   rX   Zoutput_nameZ	node_nameZnew_value_infoZnew_nodeZ
input_nameZfp32_initializersZ
next_levelqrM   Zuse_fp32_weightattrZ	has_dtypevaluerK   rt   rp   r   rY   r   convert_float_to_float16   s   $






,


















$








r   c                 C   s   t | tstdt|  | jtjkr0tdd}| jrFt| j}| j	r\tj
| j	dd}|dkrltdt|||}tt|t| S )zSMeasure the maximum absolute difference after converting a float tensor to float16.r/   z#Expected tensor data type is float.Nr0   r1   zexternal data not loaded!)r3   r   r4   r5   r6   r7   r9   r   r:   r;   r<   RuntimeErrorr.   Zamaxabsr0   )r>   r,   r-   Zfloat32_datar?   r   r   r   float_to_float16_max_diff  s    
r   )r   r   )r   r   )r   r   FFNNFN)r   r   )!r~   loggingrh   rf   typingr   Znumpyr   rd   r   r   r   r   r   r   r	   Zonnx.shape_inferencer
   r   	packagingr   	getLoggerrO   r(   r   r.   r@   rB   rl   rx   rE   r   r   r   r   r   r   <module>   sp   $


"
%!        
  >