U
    h7                  
   @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z
d dlZd dlmZmZmZ d dlmZmZmZ d dlmZmZ d dlmZmZ d dlmZ ejejeje d	d	 d d
l!m"Z" d dl#m$Z$ d dl%m&Z& e 'e(Z)ddddddddddg
Z*G dd dZ+dS )    N)Path)DictTupleUnion)WhisperConfigWhisperForConditionalGenerationWhisperProcessor)WhisperDecoderWhisperDecoderHelperWhisperDecoderInit)WhisperEncoderWhisperEncoderHelper)WhisperEncoderDecoderInitWhisperEncoderDecoderInitHelper)InferenceSessionz..)float_to_float16_max_diff)	OnnxModel)optimize_modelzwhisper-tinyzwhisper-tiny.enzwhisper-smallzwhisper-small.enzwhisper-mediumzwhisper-medium.enzwhisper-basezwhisper-base.enzwhisper-largezwhisper-large-v2c                   @   s   e Zd ZedeeeeedddZedeeejeee	eej
jf ddd	Zedeeeeef ejeeeeed
ddZedeee dddZedeeeeeeeeed	ddZeeeejdddZdS )WhisperHelper F)
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 C   s^   |}t j|r t|jd }n|dd }||7 }|rHt j| |n| }t j||d S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   Z
model_name	directory r$   Z/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/models/whisper/whisper_helper.pyget_onnx_path,   s    zWhisperHelper.get_onnx_pathT)r   	cache_dirdevicemerge_encoder_and_decoder_initstate_dict_pathr   c           
      C   s   t j| |d}|r&|jt|dd t||j}| | |r`t	|||jdd}||dS t
|jj|j}| | t|j|j}	|	 | |||	dS dS )a  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        )r'   F)strictN)decoder_start_token_id)encoder_decoder_initdecoder)encoderr.   decoder_init)r   from_pretrainedZload_state_dicttorchloadr	   configevaltor   r   modelr/   r   r.   )
r   r'   r(   r)   r*   r7   r.   r-   r/   r0   r$   r$   r%   
load_modelI   s*    
zWhisperHelper.load_modelr7   r(   onnx_model_pathverboseuse_external_data_formatZuse_decoder_input_idsZuse_int32_inputsc              	   C   sX   t | trt| |||| n6t | tr@t| |||||| nt| ||||| d S N)
isinstancer   r   export_onnxr   r   r
   r9   r$   r$   r%   r?   u   s4    



zWhisperHelper.export_onnxZSimplifiedLayerNormalizationZ SkipSimplifiedLayerNormalizationZReluZAdd)
onnx_modelop_block_listc                 C   s\  t dd |  D }t |}||}td| d|  |  jd j}d}|  }||ksft	|| }d}	|j
dkr|}	td	|j  d}
|jD ]}| |}
|
dk	r qqt|
}td
|j d|  |dk }ntd|j
 d|j  g }g }|s"|	dk	r"|g}|	jg}|t|||d}td|  | jf ddi| |S )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): . Defaults to ["SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", "Add"]
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        c                 S   s   g | ]
}|j qS r$   )op_type).0noder$   r$   r%   
<listcomp>   s     z6WhisperHelper.auto_mixed_precision.<locals>.<listcomp>z	fp32 op: z
 fp16 op: r   FNZMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node )keep_io_typesrB   node_block_listZforce_fp16_initializersz!auto_mixed_precision parameters: Zuse_symbolic_shape_inferT)setZnodes
differenceloggerinfographoutputnameoutput_name_to_nodeAssertionErrorrC   inputZget_initializerr   debugwarninglistZconvert_float_to_float16)rA   rB   Zop_full_setZfp32_op_setZfp16_op_setZlogits_output_nameZis_weight_fp16_precisionrP   rE   Zlast_matmul_nodeZinitializerrR   max_diffrG   rH   
parametersr$   r$   r%   auto_mixed_precision   sF    




z"WhisperHelper.auto_mixed_precisioncpu)	r:   optimized_model_path
is_float16num_attention_headshidden_sizer<   rX   use_gpuproviderc	              
   C   sx   ddl m}	 |	d}
d|
_|dk|
_t| d|||s6dnd|
|dd	}|rd|rXt| n|jdd
 |j||dd dS )zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsZbartTZrocm   NF)Z
model_typeZ	num_headsr]   	opt_leveloptimization_optionsr^   Zonly_onnxruntime)Zcast_input_output)Zall_tensors_to_one_file)	Zfusion_optionsr`   Zuse_multi_head_attentionZ!disable_multi_head_attention_biasr   r   rX   Z convert_model_float32_to_float16Zsave_model_to_file)r:   rZ   r[   r\   r]   r<   rX   r^   r_   r`   rc   mr$   r$   r%   optimize_onnx   s&    

zWhisperHelper.optimize_onnx)r   ort_sessionr(   c           !   
   C   s  t | |}t| }t| }zddlm} W nV tk
r } z8tj	d| dd d}t
d| d t| W 5 d	}~X Y nX ddlm} |d
ddd}	||	d d d gddj}
d\}}}}}d\}}|
|||||||ddd	}|jf |   }|d= |d= ttdd | }ttdd | }tjtjtjtjtjtjd}t||D ]\}}|dkr||    ||< n|dkrtj|j|| d||< n|dkrtj||jf|| d||< nn|dkrtj|j d d!d"gg|| d||< n@|d#kr8tjd$g|| d||< ntj|| g|| d||< qn|!d	|d d }|j"|j"krt
d% || }t#|$ |# t%d&}|dkrd'}|j&|dd(}d)}|j&|dd(}||d ko||d k} | rd}|S )*zRCompare the result from PyTorch and ONNX Runtime to verify the ONNX model is good.r   )load_datasetz.An error occurred while importing `datasets`: T)exc_infozpip install datasetszCCould not import `datasets`. Attempting to install `datasets` via `z`.Nz)hf-internal-testing/librispeech_asr_dummycleanZ
validation)r!   audioarraypt)Zreturn_tensors)      r      rm   )      ?rp   )	input_features
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyearly_stopping	use_cacherx   ry   c                 S   s   | j S r=   )rO   entryr$   r$   r%   <lambda>0      z+WhisperHelper.verify_onnx.<locals>.<lambda>c                 S   s   | j S r=   )typerz   r$   r$   r%   r|   1  r}   )ztensor(float)ztensor(float16)ztensor(int64)ztensor(int32)ztensor(int8)ztensor(uint8)rq   Z
vocab_mask)dtypeZprefix_vocab_maskZdecoder_input_idsiS  i  i  Zlogits_processorrm   z;PyTorch and ONNX Runtime outputs do not have the same shape)keyzX Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.)Zskip_special_tokenszY Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.)'r   r1   r6   r   r   Zdatasetsrg   	ExceptionrK   errorrT   r   systemrq   generatedetachrY   numpyrU   map
get_inputsnpZfloat32float16Zint64Zint32Zint8Zuint8zipZonesZ
vocab_sizerk   r,   runshapemaxminabsZbatch_decode)!r   rf   r(   Zpt_model	processorr4   rg   eZinstall_cmdZdsrq   Z
batch_sizerr   rs   rt   ru   rv   rw   inputsZ
pt_outputsZ	ort_namesZ
ort_dtypesZ	ort_to_nprO   r   Zort_outputsZdiffrV   Zpt_expected_transcriptionZpt_transcriptionZort_expected_transcriptionZort_transcriptionZparityr$   r$   r%   verify_onnx  s    

	



$
 

zWhisperHelper.verify_onnxN)r   F)Tr   )TFTF)r@   )FTFrY   )__name__
__module____qualname__staticmethodstrboolr&   r2   r(   r   nnModuler8   r   r   r	   r   r   r?   r   r   rX   intre   r   r   r$   r$   r$   r%   r   +   s|       +    % C    &r   ),loggingr   syspathlibr   typingr   r   r   r   r   r2   Ztransformersr   r   r   Zwhisper_decoderr	   r
   r   Zwhisper_encoderr   r   Zwhisper_encoder_decoder_initr   r   Zonnxruntimer   r   appendr"   dirname__file__r   r   rA   r   Z	optimizerr   	getLoggerr   rK   ZPRETRAINED_WHISPER_MODELSr   r$   r$   r$   r%   <module>   s8    
