U
    hyL                     @   s~   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ e eZG dd dZG dd dZdS )    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   @   s^   e Zd ZdddZdd Zeddd	Zed
ddZdddZ	dddZ
dd Zdd ZdS )
Gpt2MetricTorch   c                 C   sj   |dkr|dkst || _|| _| d| | _|| _d| _d| _d| _d| _d| _	d | _
d | _i | _d S )N   d   z vs r   )AssertionErrorbaseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer    r   T/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__   s    zGpt2Metric.__init__c                 C   s  | j | jkrtd td| j d| j  d | jdkrd| j | j }d| j | j }td| j d| j d	|d
d| j d| j d	|d
d td td| jd td| jd ntd| j d | j	rtd d}d}t
| j	 D ]}t| j	| d }|dkr4td| d|d
d n.tdd|  dd|d  d  d|d
d ||t| j	|  7 }|t| j	| 7 }qtd|| d
d d S )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r   z]:	zAverage Latency: )r   r   printr   r   r   r   r   r   r   sortedkeys
statisticsZmeanlen)r   Ztop_1_error_rateZtop_k_error_ratetotalcountkeyZaverager   r   r   r    %   s2    
4
.zGpt2Metric.print)is_empty_pastc                 C   s6   ||    }|r$t| j|| _nt| j|| _|S N)absmaxr   r   )r   Zbaseline_logitsZtreatment_logitsr(   diffr   r   r   diff_logitsC   s
    zGpt2Metric.diff_logits)
batch_sizec                 C   s>   |  j |7  _ tj|dftjd| _tj|dftjd| _d S )Nr   Zdtype)r   torchzerosboolr   r   )r   r.   r   r   r   start_batchL   s    zGpt2Metric.start_batchTc                 C   s^   |  |j|jd| |  |j|j| j| | |j|j|dk}|rZtd| j d|  d S )Nr   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r-   logitsr    r   )r   r   r   past_seq_lenverbosemax_diffr   r   r   
eval_batchQ   s
    zGpt2Metric.eval_batchc                 C   s   t t ||s|dkrJ|r.td| j  |  jt || O  _nH|rftd| d| j d |  jt || dj	dddkO  _d S )Nr   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)Zdimr   )
r0   alleqr    r   r   Zlogical_notr   sum	unsqueeze)r   Zbaseline_topkZtreatment_topkr   r9   r   r   r   r4   Y   s     zGpt2Metric._eval_topkc                 C   s,   |  j | j 7  _ |  j| j 7  _d S r)   )r   r   r>   r   r   r   r   r   r   	end_batchh   s    zGpt2Metric.end_batchc                 C   sF   |dkrt t|d nd}|| jkr2g | j|< | j| | d S )Nr   r   )intmathlog2r   append)r   r8   Zlatencyr'   r   r   r   add_latencyl   s    

zGpt2Metric.add_latencyN)r   r   )T)T)__name__
__module____qualname__r   r    r2   r-   rB   r3   r;   r4   rA   rF   r   r   r   r   r      s   
	

r   c                
   @   s   e Zd ZdddZedddZdd	 Zd
d Zdd Ze	dddZ
e	dd Ze	dd Ze	ejddddddddf	ddZdS )
Gpt2TesterFr   c                 C   s   |j d | _|j d | _|| _|| _|| _|| _|d k	| _|d k	| _g | _	d| j|d|| g}t
|D ]2}t||r~tjntj}| j	|| qfd | _d | _d | _|	| _|
| _d S )Nr   r   r   )shaper.   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger0   emptytypefloat16Zfloat32rE   tor7   r5   r6   r   top_k_required_order)r   rN   rO   rP   Znum_attention_headsZhidden_sizeZ	num_layerdeviceZis_fp16r   rY   Z
past_shapeZ_iZ
empty_pastr   r   r   r   t   s.    

zGpt2Tester.__init__)returnc                 C   s   t | j| j| j| jS r)   )r   rN   rO   rP   rS   r@   r   r   r   
get_inputs   s    zGpt2Tester.get_inputsc              
      s  ddl m  tj|dt| }tj|rBtd| d d S tj|dd  fdd	}g }||| j	d
 | j
r||| jd | jr||| jd t| jD ]}||| j| dt|  qt|D ]>\}}	ttj|d| dd}
|
|	  W 5 Q R X qdd | D }t|D ]t\}} t|| tjrH|| n||    }	ttj|d| dd}
|
|	  W 5 Q R X q"td|  d S )Nr   numpy_helperZtest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                    s"   |   |   | d S r)   )rE   
from_arrayclonecpunumpy)input_tensorsZtorch_tensorr   r]   r   r   
add_tensor   s    z-Gpt2Tester.save_test_data.<locals>.add_tensorrN   rO   rP   Zpast_Zinput_z.pbwbc                 S   s   g | ]
}|j qS r   )r   ).0outputr   r   r   
<listcomp>   s     z-Gpt2Tester.save_test_data.<locals>.<listcomp>Zoutput_zTest data saved to directory )onnxr^   ospathjoinstrexistsr    makedirsrN   rQ   rO   rR   rP   rT   rM   rS   	enumerateopenwriteZSerializeToStringget_outputsr`   
isinstancerc   ndarrayra   rb   )r   sessionrh   save_test_data_dirZtest_case_idrl   re   rd   itensorfZoutput_names_namer   r]   r   save_test_data   s4    ,zGpt2Tester.save_test_datac                 C   sl  t |d tjrt|d n|d    | _t	
| j| _t	
| j| j| j| _| j  | jdg|| _| jrt| j| d gd| jd|| _| jrt| jt| jdg| jgd|| _g | _t |d trt |d | _n`t!| j"D ]T}t ||d  tjr>t||d  n||d    }| j#|| qdS )z7
        Update the inputs for next inference.
        r   r   N)$ru   rc   rv   r0   
from_numpyra   detachrb   r7   rJ   predict_next_tokenr5   r   rY   r6   Zreshaper.   rX   rN   rQ   rz   rL   r?   repeatrO   rR   catrP   ZonesZtype_asrS   tuplelistrT   rM   rE   )r   rh   steprZ   ry   past_ir   r   r   update   s4    0"*zGpt2Tester.updatec                 C   s2  t d | jdk	r<| j|j   }|dkr<t d|  t| j|jks^t d| j|j | jrt| j|jkst d| j|j | j	rt| j
|j
kst d| j
|j
 t| jt|jkstt| jD ]\\}}|j|j| jkst| dkr||j|    }|dkrt d	| d
|  qdS )z3
        Compare inputs and logits output.
        zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r    r7   r*   r+   r0   r<   rN   rQ   rO   rR   rP   r$   rS   r
   rq   rK   Znelement)r   r   Zmax_io_diffry   r   Zmax_past_diffr   r   r   r,      s8    
zGpt2Tester.diffr   c                 C   sf   | dddf }|dkr*t |dd}|S t j|dddddd|f }|s^| \}}|S |S dS )z4
        Get top k topkens based on logits.
        Nr   T)Z
descending)r0   ZargmaxZargsortsort)r7   r   Zrequired_orderZlastTokenLogitsZgeneratedTokensZtopkZsorted_topk_r   r   r   r     s     zGpt2Tester.predict_next_tokenc                 C   s   g }t |D ]}t| |d  tjr6t| |d  n
| |d  }t||d  tjrht||d  n
||d  }||   }|| qt	d|  dS )zO
        Compare the present outputs of two outputs from ONNX Runtime.
        r   zpresent_diff_max=N)
rT   ru   rc   rv   r0   r~   r*   r+   rE   r    )onnx_outputonnx_io_outputrM   Zpresent_diff_maxry   Zonnx_present_iZonnx_io_present_ir:   r   r   r   diff_present+  s    

zGpt2Tester.diff_presentc                 C   s*   ddl m} || }ddlm} |j|kS )z>
        Returns True if the ONNX model is quantized.
        r   )load)__producer__)rj   r   Z!onnxruntime.quantization.quantizer   Zproducer_name)Zonnx_model_pathr   modelZquantize_producerr   r   r   is_quantized_onnx_model@  s    z"Gpt2Tester.is_quantized_onnx_modelZGpt2LMHeadModelT   r   .c           -      C   s  t d| d| d| dt| d|	 d |jj}|jj}|jj}|jj}d}|tjk}|rtd| 	 d j
kstt| | tjd	d
d|j|d}tj|||d}d}|tjkrdnd}t|||}t|||}t|d ||}t|D ]\}}|	dkr||	kr q|d dkr*t |  |d }d|krD|d nd}d|krZ|d nd}t|||||||||| 
}t|||||||||| 
} t|||||||d|| 
}!|!j}"||" ||" t  tj|"tjd}#t|D ]}$t|j d }%t|jd  d }&t ! }'t"||!# }(|$|&t ! |'  |!%|(|$| tj&| |# dd\})}*|$|&|*d  |%|)|$| tj|"|&|%|j|d}+t'||+ tj(| | # ||+dddd\},}*|$|&|*d  ||k r| )| |,|| |d7 }| %|,|$| |
rZ|*|  t+|)|,| t d  t d!|!j, t d"|j, t d#| j, |j-|!||&|
d$ |j-|!| |&|
d$ |#|!j,|k. B }#t/|#r qqW 5 Q R X |0  |0  q|   |   |   dS )%z
        Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
        It will print top 1 and top k errors on the given test inputs.
        zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   rW             )r.   Zpast_sequence_lengthZsequence_lengthconfigmodel_class)
is_float16r   zQuantized OnnxZOnnxz with IO Binding
   rN   rO   NrP   Fr/   r      )
total_runsr   )r   T)r   Zreturn_numpyZinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)r9   )1r    r$   r   rM   n_headn_embdeos_token_idr   ZFLOAT16rt   rV   r
   evalrX   r   Zget_output_shapesZget_output_buffersZINT8r   rq   rJ   r.   r3   r0   Zno_gradr1   r2   rT   r   rN   sizerS   timeitZdefault_timerZpytorch_inferencer\   rF   r   Zonnxruntime_inferenceZauto_increase_buffer_sizeZ$onnxruntime_inference_with_binded_ior}   r,   r   r5   r;   anyr<   rA   )-rw   r   rZ   Ztest_inputsZ	precisionr   r   Ztop_k_no_orderZ	max_stepsZ
max_inputsr9   r}   rx   rM   r   r   r   Ztest_data_savedr   Zinit_output_shapesZoutput_buffersr   r   Ztorch_metricZonnx_metricZonnx_io_metricry   inputsrN   rO   rP   Zonnx_runnerZonnx_io_runnerZtorch_runnerr.   doner   Zseq_lenr8   
start_timeZpytorch_outputr   Zavg_latency_msZoutput_shapesr   r   r   r   test_generationL  s   &



  




zGpt2Tester.test_generationN)Fr   F)r   F)rG   rH   rI   r   r   r\   r}   r   r,   staticmethodr   r   r   r   ZFLOAT32r   r   r   r   r   rJ   s   s2   
   
+'('

rJ   )loggingrC   rk   r#   r   rc   r0   Zbenchmark_helperr   Zgpt2_helperr   r   	getLoggerrG   loggerr   rJ   r   r   r   r   <module>   s   
^