U
    h0                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
Z
d dlmZ e eZG dd dZG dd dZG d	d
 d
ZdS )    N)OrderedDict)AnyDictListTupleUnion)InferenceSessionc                   @   s   e Zd ZeeeedddZeeedddZeeddd	Zeedd
dZ	ee
jdddZeejdddZeeeee
jf dddZdS )
TypeHelper)ort_sessionnamereturnc                 C   s>   t |  D ]\}}|j|kr|j  S qtd| dd S )Nzinput name 
 not found)	enumerate
get_inputsr   type
ValueError)r
   r   _iinput r   N/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/io_binding_helper.pyget_input_type   s    
zTypeHelper.get_input_type)r   r   c                 C   s>   t |  D ]\}}|j|kr|j  S qtd| dd S )Nzoutput name r   )r   get_outputsr   r   r   )r
   r   r   outputr   r   r   get_output_type   s    
zTypeHelper.get_output_type)ort_typec                 C   s6   t jt jt jt jtd}| |kr.t|  d||  S N)ztensor(int64)ztensor(int32)ztensor(float)ztensor(float16)ztensor(bool) not found in map)numpylonglongintcfloat32float16boolr   )r   Zort_type_to_numpy_type_mapr   r   r   ort_type_to_numpy_type   s    z!TypeHelper.ort_type_to_numpy_typec                 C   s8   t jt jt jt jt jd}| |kr0t|  d||  S r   )torchint64int32r    r!   r"   r   )r   Zort_type_to_torch_type_mapr   r   r   ort_type_to_torch_type+   s    z!TypeHelper.ort_type_to_torch_type)
numpy_typec                 C   sP   t jtjt jtjt jtjt jtjt jtjttji}| |krHt	|  d||  S Nr   )
r   r   r$   r%   r   r&   r    r!   r"   r   )r(   Znumpy_type_to_torch_type_mapr   r   r   numpy_type_to_torch_type9   s           z#TypeHelper.numpy_type_to_torch_type)
torch_typec              
   C   sH   t jtjt jtjt jtjt jtjt jti}| |kr@t	|  d||  S r)   )
r$   r%   r   r   r&   r   r    r!   r"   r   )r+   Ztorch_type_to_numpy_type_mapr   r   r   torch_type_to_numpy_typeH   s         z#TypeHelper.torch_type_to_numpy_type)r
   r   c                 C   sH   i }|   D ]}t|j||j< q|  D ]}t|j||j< q,|S )z:Create a mapping from input/output name to numpy data type)r   r	   r#   r   r   r   )r
   Zname_to_numpy_typer   r   r   r   r   get_io_numpy_type_mapV   s    z TypeHelper.get_io_numpy_type_mapN)__name__
__module____qualname__staticmethodr   strr   r   r#   r'   r   dtyper*   r$   r,   r   r-   r   r   r   r   r	      s   r	   c                   @   sR   e Zd ZeedddZedejejejeej dddZ	edd	d
Z
dS )IOBindingHelper)r
   c                 C   sJ   i }|  D ]8\}}t| |}t|}tjt|||d||< q|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)r3   device)itemsr	   r   r'   r$   emptyr   prod)r
   output_shapesr5   output_buffersr   shaper   r+   r   r   r   get_output_buffersc   s    
z"IOBindingHelper.get_output_buffersN)	input_idsposition_idsattention_maskpastc              
   C   s  |dkrt | }|  }| s&t|d|jjd|d t|	 |
  |dk	rt|D ]\\}	}
|
 srt|

 }|dkr|
 }|d|	 |
jjd|d|	  t|
	 | q^|dk	r| st|d|jjd|d t|	 |
  |dk	r8| st|d|jjd|d t|	 |
  |  D ]^}|j}|| }t| d|jj dt|	   |||jjd|| || |
  q@|S )	z)Returnas IO binding object for a session.Nr=   r   Zpast_r?   r>   z device type=z shape=)r	   r-   
io_bindingis_contiguousAssertionError
bind_inputr5   r   listsizedata_ptrr   r   r   loggerdebugbind_output)r
   r=   r>   r?   r@   r:   r9   Zname_to_np_typerA   iZpast_irG   r   output_nameZoutput_bufferr   r   r   prepare_io_bindingm   sv    

	
	
	


&	z"IOBindingHelper.prepare_io_bindingTc           
      C   sn   g }|   D ]\}|j}|| }|| }|dt| |  }	|r^||	   q||	 q|S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r   r   r8   Zreshapeclonedetachappendcpu)
r
   r:   r9   Zreturn_numpyZort_outputsr   rL   bufferr;   Zcopy_tensorr   r   r   "get_outputs_from_io_binding_buffer   s     z2IOBindingHelper.get_outputs_from_io_binding_buffer)N)T)r.   r/   r0   r1   r   r<   r$   Tensorr   rM   rS   r   r   r   r   r4   b   s   		 Ur4   c                   @   s   e Zd ZdZdeejdddZdd Ze	e
eee ee f f dd	d
Ze	e
ejf dddZeeee	e
ef dddZdS )CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerF)r
   r5   c                 C   sl   || _ dd | j  D | _dd | j  D | _t| j | _| j  | _|| _	t
 | _t
 | _|| _d S )Nc                 S   s   g | ]
}|j qS r   r   ).0r   r   r   r   
<listcomp>   s     z(CudaSession.__init__.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   rV   )rW   r   r   r   r   rX      s     )r
   r   input_namesr   output_namesr	   r-   io_name_to_numpy_typerA   enable_cuda_graphr   input_tensorsoutput_tensorsr5   )selfr
   r5   r\   r   r   r   __init__   s    zCudaSession.__init__c                 C   s   | ` | `| `| `d S )N)r]   r^   rA   r
   )r_   r   r   r   __del__   s    zCudaSession.__del__)
shape_dictc              
   C   sR  | j r| D ]\}}|| jkr|| jkrLt| j| jt|krDqtd| j| }tj	t|t
|dj| jd}|| j|< | j||jj|jj|t| |  q| D ]\}}|| jkr|| jkrt| j| jt|krq| j| }tj	t|t
|dj| jd}|| j|< | j||jj|jj|t| |  qdS )z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)r3   )r5   N)r\   r6   rY   r]   tupler;   RuntimeErrorr[   r$   r7   r	   r*   tor5   rA   rD   r   indexrE   rF   rG   rZ   r^   rJ   )r_   rb   r   r;   Znumpy_dtypetensorr   r   r   allocate_buffers   sJ    




	
"


zCudaSession.allocate_buffers)	feed_dictc              
   C   s  |  D ]\}}t|tjr$| s(t|| jkr| jr| j| 	 |	 ksRt| j| j
|j
ksht|jjdksxtddlm} || j|  | | |	  |jj q| j||jj|jjt|j
t|jdkrdgnt|j|  q| j| j | jS )z$Bind input tensors and run inferencecudar   )cudart   )r6   
isinstancer$   rT   rB   rC   rY   r\   r]   Znelementr3   r5   r   rj   rk   Z
cudaMemcpyrG   Zelement_sizeZcudaMemcpyKindZcudaMemcpyDeviceToDevicerA   rD   rf   r	   r,   lenr;   rE   r
   Zrun_with_iobindingr^   )r_   ri   r   rg   rk   r   r   r   infer  s0    

	zCudaSession.infer)	device_idr\   r   c                 C   s   | d|dS )NZkSameAsRequested)rp   Zarena_extend_strategyr\   r   )rp   r\   r   r   r   get_cuda_provider_options8  s    z%CudaSession.get_cuda_provider_optionsN)F)r.   r/   r0   __doc__r   r$   r5   r`   ra   r   r2   r   r   intr   rh   rT   ro   r1   r"   r   rq   r   r   r   r   rU      s   &/!rU   )loggingcollectionsr   typingr   r   r   r   r   r   r$   Zonnxruntimer   	getLoggerr.   rH   r	   r4   rU   r   r   r   r   <module>   s   
Uq