
    h#                     z    S SK r S SKrS SKrS SKrS SKJr  \ R                  " \5      rS r	S r
S r " S S5      rg)    N)Conv1Dc                 8   U R                   R                  u  p[        R                  R	                  X5      nU R                   R
                  R                  R                  5       UR                   l        U R                  R
                  UR                  l        U$ )N)	weightshapetorchnnLineardataT
contiguousbias)modulein_sizeout_sizelinears       b/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/quantize_helper.py_conv1d_to_linearr      sf    ++GXX__W/F++--88:FMM{{''FKKM    c                     [         R                  S5        [        U R                  5       HM  nU R                  U   n[	        U[
        5      (       a  [        U5      nX0R                  U'   MB  [        U5        MO     g)zkin-place
This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
zreplace Conv1D with LinearN)loggerdebuglist_modules
isinstancer   r   conv1d_to_linear)modelnamer   r   s       r   r   r      s]     LL-.U^^$%ff%%&v.F#)NN4 V$ %r   c                     [         R                  " U R                  5       S5        [        R                  R                  S5      S-  n[        R                  " S5        U$ )Nztemp.p   )r   save
state_dictospathgetsizeremove)r   sizes     r   _get_size_of_pytorch_modelr'   '   sA    	JJu!8,77??8$4DIIhKr   c                   R    \ rS rSr\\R                  4S j5       r\SS j5       rSr	g)QuantizeHelper.   c                    [        U 5        [        R                  R                  U [        R                  R
                  1US9n[        R                  S[        U 5       35        [        R                  S[        U5       35        U$ )zc
Usage: model = quantize_model(model)

TODO: mix of in-place and return, but results are different
)dtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   quantizationquantize_dynamicr   r	   r   infor'   )r   r,   quantized_models      r   quantize_torch_model#QuantizeHelper.quantize_torch_model/   ss     	,,==eehhooEV^c=d=>XY^>_=`ab89STc9d8efgr   c                    SSK Jn  SSKJn  U" U5      R                  R                  SSS9  [        R                  S[        R                  R                  U 5      S-   35        U" U UUS[        R                  R                  0S	9  [        R                  S
U 35        [        R                  S[        R                  R                  U5      S-   35        g )Nr   )Path)r.   T)parentsexist_okz&Size of full precision ONNX model(MB):r   DefaultTensorType)use_external_data_formatextra_optionszquantized model saved to:z!Size of quantized ONNX model(MB):)pathlibr4   onnxruntime.quantizationr.   parentmkdirr   r/   r"   r#   r$   onnxTensorProtoFLOAT)onnx_model_pathquantized_model_pathr8   r4   r.   s        r   quantize_onnx_model"QuantizeHelper.quantize_onnx_model<   s     =!"))//t/L<RWW___=]al=m<nop %=.0@0@0F0FG		
 	/0D/EFG7H\8]al8m7nopr    N)F)
__name__
__module____qualname____firstlineno__staticmethodr   qint8r1   rC   __static_attributes__rE   r   r   r)   r)   .   s1    */++ 
 
 q qr   r)   )loggingr"   r>   r   transformers.modeling_utilsr   	getLoggerrF   r   r   r   r'   r)   rE   r   r   <module>rP      sA     	   .			8	$%q qr   