U
    hX:                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ e eZG d
d dejjZG dd dejjZG dd dZ G dd dZ!dS )    N)Path)ListOptionalUnion)
TypeHelper)PastKeyValuesHelper)	OnnxModel)torch_onnx_export)WhisperConfig
file_utils)InferenceSessionc                       sH   e Zd ZdZd	ejjeee	 d fddZ
ejejdddZ  ZS )
WhisperDecoderInitzvA Whisper decoder to create initial past key values.
    This model is only called once during starting decoding.
    N)decoderconfigdecoder_start_token_idc                    s0   t    || _|| _|d k	r"|n| jj| _d S N)super__init__r   r   r   )selfr   r   r   	__class__ [/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/models/whisper/whisper_decoder.pyr       s
    
zWhisperDecoderInit.__init__)decoder_input_idsencoder_hidden_statesc                 C   sV   t  }||d< d |d< d |d< | jjd ||d ddd}| j|d }||j|jfS )Nlast_hidden_statehidden_states
attentionsTencoder_outputsr   past_key_valuesZ	use_cacheZreturn_dictr   )r   ModelOutputr   modelZproj_outr    Zencoder_last_hidden_state)r   r   r   r   outlogitsr   r   r   forward-   s    zWhisperDecoderInit.forward)N)__name__
__module____qualname____doc__torchnnModuler
   r   intr   ZTensorZFloatTensorr%   __classcell__r   r   r   r   r      s    r   c                       s(   e Zd ZdZ fddZdd Z  ZS )WhisperDecoderz&A Whisper decoder with past key valuesc                    s   t    || _|| _d S r   )r   r   r   r   )r   r   r   r   r   r   r   F   s    
zWhisperDecoder.__init__c           
      G   s   t  }t|jd dt| jjf}||d< ||d< d |d< t|dkrPd }n
t	
|}| jd |||ddd}|d }t	|j\}}	||fS )Nr     r   r   r   Tr   )r   r!   r*   Zrandnshaper-   r   d_modellenr   Zback_group_by_layerr   Zgroup_by_self_and_crossr    )
r   r   pastr   Zdummy_encoder_hidden_statesr    Zdecoder_outr$   Zpresent_self_r   r   r   r%   K   s&    
zWhisperDecoder.forward)r&   r'   r(   r)   r   r%   r.   r   r   r   r   r/   C   s   r/   c                
   @   sN   e Zd ZdddZedeeeeeje	e	dddZ
edd	d
Zdd ZdS )WhisperDecoderInputsNc                 C   s   || _ || _d S r   )r   r    )r   r   r    r   r   r   r   d   s    zWhisperDecoderInputs.__init__F)r   
batch_sizeencode_sequence_lengthpast_decode_sequence_lengthdevicefloat16use_int32_inputsc                 C   s   | j }| j}| j}	| j| j  }
d}tjd|	d ||f|r>tjntj|d}|rTtjntj	}|dkr||||
g}||||
g}g }t
d| D ]}|tj|||d qt
d| D ]}|tj|||d qnd}t||S )ad  Create dummy inputs for WhisperDecoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            WhisperDecoderInputs: dummy inputs for decoder
           r   )lowhighsizedtyper:      )rA   r:   N)Zencoder_attention_headsdecoder_layers
vocab_sizer2   r*   randintZint32Zint64r;   float32rangeappendZrandr6   )r   r7   r8   r9   r:   r;   r<   Znum_attention_heads
num_layersrD   Z	head_sizesequence_lengthr   Z
float_typeZself_attention_past_shapeZcross_attention_past_shaper4   r5   r   r   r   create_dummyl   s>    z!WhisperDecoderInputs.create_dummy)returnc                 C   s   | j g}| jr|| j |S r   )r   r    extend)r   
input_listr   r   r   to_list   s    zWhisperDecoderInputs.to_listc                 C   s*   | j rdd | j D nd }t| j |S )Nc                 S   s   g | ]}|j tjd qS ))rA   )tor*   rF   ).0pr   r   r   
<listcomp>   s     z0WhisperDecoderInputs.to_fp32.<locals>.<listcomp>)r    r6   r   clone)r   r4   r   r   r   to_fp32   s
    zWhisperDecoderInputs.to_fp32)N)FF)r&   r'   r(   r   staticmethodr
   r-   r*   r:   boolrK   r   rO   rU   r   r   r   r   r6   c   s     
  Cr6   c                	   @   sd   e Zd ZedeejeeeedddZ	ee
dddZedeeef eejeed
ddZdS )WhisperDecoderHelperTF)r   r:   onnx_model_pathverboseuse_external_data_formatr<   c                 C   s  t | ttfsttj| jddt | tr,dnd||d}| }tj	| jj
dd}tj	| jj
dd}	|	d	d| jj
  }
t | tr|ng }t | tr|
n|	}d|}dg}|| ddiddddddd}|D ]}dd|krdndd||< q|D ]F}d|krddd||< qt | tr2ddd||< qddi||< qt|jjddd t z}tj|d}t|jjddd t| t||r|n|d|||dd||d |rtj|dd}tj||ddd W 5 Q R X d	S )a  Export decoder to ONNX

        Args:
            decoder (Union[WhisperDecoder, WhisperDecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        rB   r0      r   )r7   r8   r9   r:   r<   F)ZpresentTNr$   	input_idsr7   zencode_sequence_length / 2)r   r=   rJ   )r]   r   r$   r   r9   r8   )r   rB   Zcrosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfZexport_paramsinput_namesoutput_namesdynamic_axesZopset_versionZdo_constant_foldingr[   rZ   )Zload_external_data)Zsave_as_external_dataZall_tensors_to_one_file)r$   )
isinstancer/   r   AssertionErrorr6   rK   r   rO   r   get_past_namesrC   rM   r   parentmkdirtempfileTemporaryDirectoryospathjoinr	   tupleonnxZ
load_modelr   save)r   r:   rY   rZ   r[   r<   inputsrN   
past_namesZpresent_namesZpresent_self_namesZinput_past_namesZoutput_present_namesrd   rc   re   nameZtmp_dir_nameZtemp_onnx_model_pathr"   r   r   r   export_onnx   sz    

 

z WhisperDecoderHelper.export_onnx)rs   c                 C   s   t d dt|j  i}|jrt|jd dks>tt	t|jd }t
|}t|jD ]"\}}t|  ||| < qd| d|}|S )zRun inference of ONNX model.zstart onnxruntime_inferencer]      r   N)loggerdebugnumpyZascontiguousarrayr   cpur    r3   rg   r-   r   rh   	enumeraterun)ort_sessionrs   Z
ort_inputsrI   rt   iZpast_tensorort_outputsr   r   r   onnxruntime_inference-  s    
 
z*WhisperDecoderHelper.onnxruntime_inferencerw   )r"   r~   r:   r<   	max_casesc                 C   s  t |ddk}ddddg}g }|d| D ]\}}	}
t| trHd}n|
}tj| j||	||||d	}|  }t	
  | | }W 5 Q R X t||}tt|d   |d  }|}td
|  td| jj D ]P}tt|d |   |d|   }td| d|  t||}qt| trtd| jj D ]^}tt|d |   |dd| jj  |   }td| d|  t||}qP|| td||	|
| q,|S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.Zpast_key_self_0ztensor(float16))rw         )r=   rB   r\   )r   r=   r=   )   r\   rB   Nr   )r:   r;   r<   zlogits max_diff=rB   r=   zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   Zget_input_typerf   r   r6   rK   r   rU   rO   r*   Zno_gradrX   r   rz   Zamaxabsr{   rx   ry   rG   rI   maxrH   info)r"   r~   r:   r<   r   r;   Z
test_casesZtest_cases_max_diffr7   r8   r9   Zdec_seq_lenrs   rN   Ztorch_outputsr   Zmax_diffZmax_diff_allr   r   r   r   verify_onnx@  s^    	


$,0
z WhisperDecoderHelper.verify_onnxN)TFF)rw   )r&   r'   r(   rV   r/   r*   r:   strrW   rv   r6   r   r   r   r   r-   r   r   r   r   r   rX      s.      m 
rX   )"loggingrm   rk   pathlibr   typingr   r   r   rz   rq   r*   Zio_binding_helperr   Zmodels.t5.past_helperr   Z
onnx_modelr   Ztorch_onnx_export_helperr	   Ztransformersr
   r   Zonnxruntimer   	getLoggerr&   rx   r+   r,   r   r/   r6   rX   r   r   r   r   <module>   s$   
( [