U
    “±Ëh¾
  ã                   @   s`   d dl Z d dlZd dlZd dlZd dlmZ e  e¡Zdd„ Z	dd„ Z
dd„ ZG d	d
„ d
ƒZdS )é    N)ÚConv1Dc                 C   s<   | j j\}}tj ||¡}| j jj ¡ |j _| jj|j_|S )N)	ZweightÚshapeÚtorchÚnnÚLinearÚdataÚTÚ
contiguousZbias)ÚmoduleZin_sizeZout_sizeÚlinear© r   úL/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/transformers/quantize_helper.pyÚ_conv1d_to_linear   s
    r   c                 C   sN   t  d¡ t| jƒD ]4}| j| }t|tƒr@t|ƒ}|| j|< qt|ƒ qdS )zsin-place
    This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
    zreplace Conv1D with LinearN)ÚloggerÚdebugÚlistZ_modulesÚ
isinstancer   r   Úconv1d_to_linear)ÚmodelÚnamer
   r   r   r   r   r      s    


r   c                 C   s.   t  |  ¡ d¡ tj d¡d }t d¡ |S )Nztemp.pé   )r   ÚsaveZ
state_dictÚosÚpathÚgetsizeÚremove)r   Úsizer   r   r   Ú_get_size_of_pytorch_model'   s    
r   c                   @   s,   e Zd Zeejfdd„ƒZeddd„ƒZdS )ÚQuantizeHelperc                 C   sL   t | ƒ tjj| tjjh|d}t dt| ƒ› ¡ t dt|ƒ› ¡ |S )z{
        Usage: model = quantize_model(model)

        TODO: mix of in-place and return, but results are different
        )Údtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   ZquantizationÚquantize_dynamicr   r   r   Úinfor   )r   r   Zquantized_modelr   r   r   Úquantize_torch_model/   s
    z#QuantizeHelper.quantize_torch_modelFc                 C   s†   ddl m} ddlm} ||ƒjjddd t dtj	 
| ¡d › ¡ || ||d t d	|› ¡ t d
tj	 
|¡d › ¡ d S )Nr   )ÚPath)r    T)ÚparentsÚexist_okz&Size of full precision ONNX model(MB):r   )Úuse_external_data_formatzquantized model saved to:z!Size of quantized ONNX model(MB):)Úpathlibr#   Zonnxruntime.quantizationr    ÚparentÚmkdirr   r!   r   r   r   )Zonnx_model_pathZquantized_model_pathr&   r#   r    r   r   r   Úquantize_onnx_model<   s    ýz"QuantizeHelper.quantize_onnx_modelN)F)Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr   Zqint8r"   r*   r   r   r   r   r   .   s   r   )Úloggingr   Zonnxr   Ztransformers.modeling_utilsr   Ú	getLoggerr+   r   r   r   r   r   r   r   r   r   Ú<module>   s   
