U
    h/                     @   sJ  d dl mZmZ d dlZd dlZd dlmZ d dlm	Z	m
Z
 ejedddZd)eejeeeed
ddZd*eejeeeeeedddZd+eejeeeeeeeeedddZeeeeeeeedddZd,eeeeedddZeeejejf  dddZd-eeeeeed d!d"Zeeed#d$d%Ze	eeeeed&d'd(ZdS ).    )ListTupleN)
AutoConfig)InferenceSessionOrtValue)attention_maskuse_past_kvc                 C   s@   |   dd }|| dkd |r<|d d df d}|S )N   r   )longZcumsumZmasked_fill_Z	unsqueeze)r   r   position_ids r   V/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_ids   s
    r   ptF)configdevice
batch_sizeseq_lenenginereturn_dictc           
      C   s   t jd| j||ft jd}t j||t jd}t|dd}|dkrH| n||}|dkrb| n||}|dkr|| n||}|s|||fS |||d}	|	S )Nr   lowhighsizedtyper   Fr   ort	input_idsr   r   )torchrandint
vocab_sizeint64onesr   numpyto)
r   r   r   r   r   r   r    r   r   inputsr   r   r   get_sample_inputs   s    
r)   r
   )r   r   r   past_seq_lenuse_fp16r   r   
world_sizec                    s.  t jd| j|dft jd}t j||d t jd}	t|	dd}
t| ||||d}|dkr^| n| }|dkrx|	 n|	 }	|dkr|
 n|
 }
|dkrt	|nt
t fd	d
|}|st|t
st||	|
|fS ||	|
d}|dkrt|tst|| nt|t
s"t||d< |S )Nr   r
   r   r   Tr   r,   r   c                    s   | d   | d   fS Nr   r
   r'   kvr   r   r   <lambda>T       z0get_sample_with_past_kv_inputs.<locals>.<lambda>r   past_key_values)r!   r"   r#   r$   r%   r   get_past_kv_inputsr&   r'   flatten_past_kv_inputslistmap
isinstanceAssertionErrordictupdate)r   r   r   r*   r+   r   r   r,   r    r   r   past_kvr(   r   r2   r   get_sample_with_past_kv_inputs=   s0    


r?   )r   r   r   r   r*   max_seq_lenr+   use_gqar   r   r,   c                    sD  t jd| j||ft jd}t j||| t jd}t||dkd}t| ||||
d}|dkrb| n| }|dkr|| n| }|dkr| n| }|dkrt	|nt
t fdd|}|	st|t
st||||fS |||d	}|dkr(t|ts
t|| |r@t|||}nt|t
s8t||d
< |S )Nr   r   r   r   r-   r   c                    s   | d   | d   fS r.   r/   r0   r2   r   r   r3      r4   z7get_merged_sample_with_past_kv_inputs.<locals>.<lambda>r   r5   )r!   r"   r#   r$   r%   r   r6   r&   r'   r7   r8   r9   r:   r;   r<   r=    enable_past_present_share_buffer)r   r   r   r   r*   r@   r+   rA   r   r   r,   r    r   r   r>   r(   r   r2   r   %get_merged_sample_with_past_kv_inputsu   s4    


rC   )r   r   r*   r   r@   r+   rA   split_kvc                 C   sp  |r
t jnt j}| j| j }	|st j||| j|dt jt 	|||fdd |t j|| j
|| j|	|t j|| j
|| j|	|t j|t jdd}
nt j||| j|t jt j	|||ft jdddd t jt j|t jdd}
t| j
D ]T}|
d| dt j|| j||	|d	| dt j|| j||	|i q|rlt|
||}
|
S )
Ng     r
   )kr   )x	attn_maskZk_cacheZv_cachepos)rF   rG   rH   Zk__cacheZv_)npfloat16float32hidden_sizenum_attention_headsrandomrandZastypeZtriur%   num_hidden_layersarrayr$   Zint32ranger=   rB   )r   r   r*   r   r@   r+   rA   rD   Znp_dtype	head_size
ort_inputsir   r   r   get_msft_sample_inputs   sp    
"        $
    
    rW   )r   r   r*   r+   r,   c                    sL   | j | | j| j  |r"tjntj fddt| jD }|S )Nc              
      s4   g | ],}t j d t j d fqS )r   )r!   rP   ).0_r   rT   	num_headsr*   Ztorch_dtyper   r   
<listcomp>   s   z&get_past_kv_inputs.<locals>.<listcomp>)Znum_key_value_headsrM   rN   r!   rK   rL   rS   rQ   )r   r   r*   r+   r,   r>   r   rZ   r   r6      s    r6   )r5   c                 C   sV   i }t | D ]D\}\}}|   |d| d< |   |d| d< q|S )Nzpast_key_values.z.keyz.value)	enumeratedetachcpur&   )r5   r>   rV   Zpast_kZpast_vr   r   r   r7      s
    r7       r	   )	pt_inputsrA   r*   r@   r   	device_idc           	      C   s   i }|   D ]J\}}t|tjr*|||< q|dkrB|t| q|   ||< q|r|dkr|dkr|dkrt	|||}|S )Nr5   ra   r_   r	   )
itemsr:   rJ   Zndarrayr=   r7   r^   r_   r&   rB   )	rb   rA   r*   r@   r   rc   rU   rE   vr   r   r   convert_inputs_for_ort   s    
rf   )rU   r*   r@   c           
      C   st   |   D ]f\}}d|ks d|kr|j\}}}}tj||||f|jd}	||	d |d |d |d |f< |	| |< q| S )Ncacher5   r   )rd   shaperJ   zerosr   )
rU   r*   r@   rE   re   r   r[   rY   rT   Znew_vr   r   r   rB     s     
rB   )modelrU   r   rc   rA   kv_cache_ortvaluesc                 C   s   |   }| D ]\}}|rzd|ks,d|krz||krZtj|||d}	|||	 |	||< q|| | ||||  qtj|||d}	|||	 q|  D ]T}
|
j}|rd|ksd|kr|dddd}|	|||  q|j
|||d q||fS )Nrg   r5   )Zdevice_typerc   outZpresent)
io_bindingrd   r   Zortvalue_from_numpyZbind_ortvalue_inputZupdate_inplaceget_outputsnamereplaceZbind_ortvalue_outputZbind_output)rj   rU   r   rc   rA   rk   rm   rE   re   Zv_deviceoutputro   Z
input_namer   r   r   add_io_bindings  s$    
rr   )r   F)Fr   Fr
   )FFr   Fr
   )r
   )Fr   r`   ra   r	   )typingr   r   r&   rJ   r!   Ztransformersr   Zonnxruntimer   r   ZTensorboolr   r   intstrr)   r?   rC   rW   r6   r7   r<   rf   rB   rr   r   r   r   r   <module>   s     (    ?     75          