
    h                       S SK Jr  S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
r
S SKrS SKJrJrJr  S SKJr  S SKJrJrJrJr  S SKJr  S S	KJrJrJr   S S
KJr   S SKJrJ r    S SK!J"r"  Sr#Sr$Sr%Sr&Sr'Sr(Sr)Sr*Sr+Sr,0 r-\." \5       V s0 s H$  n \/" \0" \U 5      \15      (       d  M  \0" \U 5      U _M&     sn r2 " S S\5      r3 " S S\5      r4 " S S\5      r5 " S S\5      r6\R                  Rn                  \
Rp                  " S5      \R                  Rr                  \
Rp                  " S 5      \R                  Rt                  \
Rp                  " S!5      \R                  Rv                  \
Rp                  " S"5      \R                  Rx                  \\R                  Rz                  \\R                  R|                  \ 0r?\R                  Rr                  \
R                  " S \
R                  S#9\
R                  " S$\
R                  S#94\R                  Rn                  \
R                  " S%\
R                  S#9\
R                  " S&\
R                  S#94\R                  Rv                  \
R                  " S \
R                  S#9\
R                  " S'\
R                  S#94\R                  Rt                  \
R                  " S(\
R                  S#9\
R                  " S)\
R                  S#94\R                  R|                  \
R                  " S \ S#9\
R                  " S*\ S#94\R                  Rz                  \
R                  " S+\S#9\
R                  " S,\S#940rE\R                  Rr                  \
R                  " S \
R                  S#9\
R                  " S-\
R                  S#94\R                  Rn                  \
R                  " S.\
R                  S#9\
R                  " S&\
R                  S#94\R                  Rv                  \
R                  " S \
R                  S#9\
R                  " S/\
R                  S#94\R                  Rt                  \
R                  " S0\
R                  S#9\
R                  " S)\
R                  S#940rF\R                  Rr                  \
R                  " S \
R                  S#9\
R                  " S&\
R                  S#94\R                  Rn                  \
R                  " S1\
R                  S#9\
R                  " S2\
R                  S#94\R                  Rv                  \
R                  " S \
R                  S#9\
R                  " S)\
R                  S#94\R                  Rt                  \
R                  " S3\
R                  S#9\
R                  " S4\
R                  S#94\R                  R|                  \
R                  " S \S#9\
R                  " S,\S#94\R                  Rz                  \
R                  " S5\S#9\
R                  " S6\S#940rGS7S8.S9 jrHScS: jrISdS; jrJS< rK    Se               SfS= jjrL Se SgS> jjrM  Sc             ShS? jjrNSiS@ jrOSiSA jrPSjSB jrQSkSC jrR " SD SE5      rS " SF SG5      rT " SH SI5      rUSJ rVSK rWSL rXSM rYSlSN jrZSO r[SmSP jr\SnSQ jr]SoSR jr^SpSS jr_SqST jr`SrSU jraSqSV jrbSrSW jrcSsSX jrdStSY jreSuSZ jrfSvS[ jrgSwS\ jrhSxS] jriSxS^ jrjSyS_ jrkSyS` jrlSySa jrmSySb jrng! \ a    Sr GNf = f! \ a    SrSr  GNf = f! \ a    Sr" GNf = fs  sn f )z    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                  2    \ rS rSrSrSrS r\S 5       rSr	g)QuantizationMode@   r      c                    U R                   $ Nnameselfs    ^/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/quantization/quant_utils.py__str__QuantizationMode.__str__D       yy    c                F     [         U    $ ! [         a    [        5       ef = fr"   )r   KeyError
ValueError)modes    r'   from_stringQuantizationMode.from_stringG   s)    	#D)) 	,	      N)
__name__
__module____qualname____firstlineno__
IntegerOps
QLinearOpsr(   staticmethodr0   __static_attributes__r3   r+   r'   r   r   @   s%    JJ  r+   r   c                  2    \ rS rSrSrSrS r\S 5       rSr	g)QuantizedValueTypeO   r   r    c                    U R                   $ r"   r#   r%   s    r'   r(   QuantizedValueType.__str__S   r*   r+   c                F     [         U    $ ! [         a    [        5       ef = fr"   )r=   r-   r.   )vs    r'   r0   QuantizedValueType.from_stringV   s)    	%a(( 	,	r2   r3   N)
r4   r5   r6   r7   InputInitializerr(   r:   r0   r;   r3   r+   r'   r=   r=   O   s%    EK  r+   r=   c                  V    \ rS rSrSrSrSrSrSrSr	Sr
S	 r\S
 5       r\S 5       rSrg)	QuantType^   r   r                   c                    U R                   $ r"   r#   r%   s    r'   r(   QuantType.__str__g   r*   r+   c                F     [         U    $ ! [         a    [        5       ef = fr"   )rG   r-   r.   )ts    r'   r0   QuantType.from_stringj   s(    	Q< 	,	r2   c                   U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R
                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ [!        SU < S35      e)NzUnexpected value qtype=.)rG   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r.   r%   s    r'   tensor_typeQuantType.tensor_typeq   s    9??"###9###$$$9$$$%%%9###$$$9***+++9###$$$9??"###24(!<==r+   r3   N)r4   r5   r6   r7   rU   rW   r]   r[   rY   ra   r_   r(   r:   r0   propertyrc   r;   r3   r+   r'   rG   rG   ^   sR    EFMFGEF   > >r+   rG   c                  2    \ rS rSrSrSrS r\S 5       rSr	g)QuantFormat   r   r    c                    U R                   $ r"   r#   r%   s    r'   r(   QuantFormat.__str__   r*   r+   c                F     [         U    $ ! [         a    [        5       ef = fr"   )rg   r-   r.   )formats    r'   r0   QuantFormat.from_string   s)    	v&& 	,	r2   r3   N)
r4   r5   r6   r7   	QOperatorQDQr(   r:   r0   r;   r3   r+   r'   rg   rg      s%    I
C  r+   rg   int8uint8int16uint16dtype   i   i  i i     i      ii  iir   i i @  rJ   zero_point_indexc                d   / n[        U5       GH   u  p4[        R                  " [        U5      [        R                  5      (       a&  UR                  [        R                  " U5      5        OB[        U[        R                  5      (       a  UR                  U5        O[        SU SU 35      eX0:X  d  M  US   nUR                  [        R                  :X  d   UR                  [        R                  :X  d  M  [        SUR                   35      e   [        U5      S:  a  [        U5      $ US   $ )Nzarg z is not an array: r|   zzero_point cannot be r    r   )	enumeratenumpy
issubdtypetypenumberappendarray
isinstancendarray	TypeErrorru   float32float16lentuple)r~   argsnew_argsiarB   s         r'   _check_typer      s    H$DGU\\22OOEKKN+5==))OOAd1#%7s;<< Aww%--'177emm+C"7y ABB   "(ma/5?@Xa[@r+   c                   U [         ;   d   SU  S35       eU [        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R                  4;   Ga/  US:w  a  [        SU< S35      eUR                  [        R                  :X  a  [        R                  nOHUR                  [        R                  :X  a  [        R                  nO[        SUR                   S35      e[        [!        [#        S/ S/[$        R&                  R)                  SU / S/5      S	9[#        S
/ SQS/5      /S[+        SUS 5      [+        SUS 5      /[+        SU S 5      /5      5      n[-        U5      n[/        UR1                  S XS.5      S   5      $ [         U    n	[3        U SSS9u  pUb  [5        X5      OU
nUb  [7        X5      OUn[        R8                  " UR;                  [        R                  5      U-  R=                  5       U-   5      n[        R>                  " XXS9  [/        UR;                  U	5      5      $ )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rT   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   r^   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorru   r   r   FLOATr   FLOAT16r.   r   r
   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefru   qminqmaxcliplowcliphigharr_fp32s                  r'   quantize_nparrayr      s!   (( 
w&de( ++--))--	  ?%(Z[eZhhi&jkk99%#))IYY%--'#++I01=>>"Bdkk>U>UVbdikmpqor>s .0LseT	 *3	4@*7ItD (UD9:

  !,3774s)CDQGHH %U+,URWX
$'O#d.&*&63t?D==#**U]]";e"C!J!J!Lz!YZ

8h=8??5122r+   c           
        US:  d  US:  a  [        SU SU 35      e[        R                  " U [        R                  " SU R                  S95      n [        R
                  " U[        R                  " SUR                  S95      nUb*  [        X[        R                  " XPR                  S9-   5      nU(       aE  [        R
                  " [        R                  " U 5      [        R                  " U5      5      nU* n U7nX#::  d   SU  SU 35       e[        R                  " X-
  [        R                  S9n[        R                  " U[        R                  S9[        R                  " U[        R                  S9-
  n[        R                  " Xx-  5      n	U	S:  d   S5       eU	[        R                  " UR                  5      R                  :  aA  [        R                  " SUR                  S9n	[        R                  " SUR                  S9n
X/$ U(       aZ  [        R                  " [        R                  " X#-   [        R                  " S	[        R                  S9-  5      UR                  S9n
O8[        R                  " [        R                  " X U	-  -
  5      UR                  S9n
U	R                  UR                  5      n	X/$ )
a  Calculate the scale s and zero point z for the quantization relation
r = s(q-z), where r are the original values and q are the corresponding
quantized values.

r and z are calculated such that every value within [rmin,rmax] has an
approximate representation within [qmin,qmax]. In addition, qmin <= z <=
qmax is enforced. If the symmetric flag is set to True, the interval
[rmin,rmax] is symmetrized to [-absmax, +absmax], where
absmax = max(abs(rmin), abs(rmax)).

:parameter rmin: minimum value of r
:parameter rmax: maximum value of r
:parameter qmin: minimum value representable by the target quantization data type
:parameter qmax: maximum value representable by the target quantization data type
:parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:return: zero and scale [z, s]

r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rt   zqmin=z > qmax=zscale issue      ?g       @)r.   r   minimumr   ru   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r'   compute_scale_zpr      s   ( ax4!8]^b]ccklpkqrss
 ==u{{1DJJ?@D==u{{1DJJ?@D !4nJJ OOPuyy		$@ww<55htf55<	T[	6B	T	/%++d%--2X	XBKK EA:$}$:u{{4::&+++Ctzz2[[$**5
   T[EKK5==,QQRZ^ZdZdJ U[[u1D%ETZZXJTZZ(r+   c                   SnU [         ;  a  U [        R                  :X  a  SSKJn  SSKJn  Un[        S5       Vs/ s H
  oS" U5      PM     nn[        R                  " U Vs/ s H?  n[        R                  " U5      (       a  M   [        R                  " U5      (       a  M=  UPMA     sn[        R                  S9nO[        SU  S35      eU[         U '   OU [        R                  :X  a  SSKJn  UnUc  [        S	U  S
35      e[        R                  " [         U    5      n	[        R                  " SUS9n
[        R                  " X-  UR                   S9nX/$ s  snf s  snf )aZ  Calculate the scale s for a float8 type (E4M3FN).
The function assumes the coefficient distribution and the float 8
distribution are similar to two gaussian laws.

:return: zero and scale [z, s]

More details in notebook `quantization_fp8.ipynb
<https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
Nr   )float8e4m3_to_float32r      rt   zQuantization to element_type=z not implemented.zUnexpected element_type rT   )FLOAT8_DISTRIBUTIONSr   r^   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   ranger   r   isnanisinfr   r.   r   stdru   )element_typer   zp_dtyper   r   r   
all_valuesfvaluesstd_f8zeror   s               r'   compute_scale_zp_float8r   :  s(    H//;333?H#H<A#JGJq/2JJG[[&TJqekk!nU[[QR^JT\a\i\iF <\NJ[\]]-3\*	11	1D2<.BCCYY+L9:F;;q)DKKCII6E=# HTs   E#E E #E c                   [        U [        R                  5      (       d  [        S[	        U 5       S35      eUb  UnO"[        U 5      (       a  U R                  5       OSnUb  UnO"[        U 5      (       a  U R                  5       OSn[        R                  " XpR                  S9n[        R                  " XR                  S9n[        R                  " SU R                  S9n	U[        R                  :X  a?  U(       a  [        S5      e[        R                  " U 5      n
[        X5      u  p[        XSS9$ U[        R                   [        R"                  [        R$                  [        R&                  [        R(                  [        R*                  4;   aU  [-        XUS	9u  p[        U 5      (       a  [/        XxXX$5      u  pO[        R                  " SUR                  S9n[        XSS9$ [1        S
U S35      e)a  
Returns the zero_point and scale for the given data.

:param data: The data for which to compute quantization parameters.
:param quant_type: The quantization data type.
:param symmetric: whether symmetric quantization is used or not.
:parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
:parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
:return: zero point and scale
z%Weight must be given as an array not rT   g        rt   r   z1Unsupported option reduce_range=True for float 8.r   r}   r   z Unexpected value for quant_type=)r   r   r   r   r   r   r   r   r   ru   r   r^   RuntimeErrorr   r   r   rV   rX   r\   rZ   rb   r`   r   r   r.   )data
quant_typer   r   r   rmin_overridermax_overrider   r   r   r   r   r   r   s                 r'   compute_data_quant_paramsr   _  s   * dEMM**?T
|1MNN  YYtxxzC  YYtxxzC;;t::.D;;t::.DKK4::.E[---RSSiio3JD
:qAA  -ZQZ[
t99 0T cJQdjj9J:qAA
7
|1E
FFr+   c                   [        U UUUUUU5      u  pxU[        R                  :X  a  [        XX5      n	[	        U	R                  [        R                  5      R                  5       S-  S:H  5      (       af  [        R                  " U 5      n
[        SU
R                  5        SU
R                  5        SU	R                  5        SU	R                  5        S3	5      eXxU	4$ U[        R                  [        R                  [        R                  [        R                   [        R"                  [        R$                  4;   a  [        XX5      n	XxU	4$ ['        SU S35      e)a   
:param data: data to quantize
:param qType: data type to quantize to.
:param symmetric: whether symmetric quantization is used or not.
:parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
:parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
:return: minimum, maximum, zero point, scale, and quantized weights

To pack weights, we compute a linear transformation

- when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
- when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
    `m = max(abs(rmin), abs(rmax))`

and add necessary intermediate nodes to transform quantized weight to full weight using the equation

:math:`r = S(q-z)`, where

- *r*: real original value
- *q*: quantized value
- *S*: scale
- *z*: zero point
rw   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rT   )r   r   r^   r   anyr   r   rq   ravelr   r   r   r   rV   rX   r\   rZ   rb   r`   r.   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r'   quantize_datar     sO   8 2J ((()%uI%%ekk288:S@SHIImmD)G=gkkm_Bw{{}o ^&&4&8&8&:%;2n>P>P>R=SSUW  .00  *%uI.00
25';
<<r+   c                   [        U 5      nSnUc  [        XR                  5       X25      nOUR                  U   n[	        UR                  5      n	SX'   / n
[        U5       Hj  nUR                  X5      nX;   nX+   n[        XR                  5       X5      nU
R                  [        R                  " U5      R                  U	5      5        Ml     [        R                  " X5      nU(       a  UOU R                   [         3nU[        R                  R                   :X  Ga&  [        R                  " 5       nUUl        UR$                  R'                  U R$                  5        UUl        UR)                  5       R+                  5       R-                  5       Ul        [0        b  [1        U5      nUR                  UR                  :w  d"  UR-                  5       UR-                  5       :w  a]  [3        SUR                   SUR-                  5       SS  SUR-                  5       SS  SU R                   S[5        U5      SS	  S
35      eU$ U[        R                  R6                  [        R                  R8                  4;   a  UR:                  [        R<                  [        R>                  4;  a  [3        SU S35      e[A        [C        UR-                  5       5      5      n[        RD                  RG                  UXR$                  USS9nU$ [        RD                  RI                  U5      n[        R                  " UUS9R                  U R$                  5      n[        RJ                  RM                  UU5      nU$ )a  
Returns a quantized version of the given ONNX initializer.

:param weight: The ONNX initializer to quantize.
:param quant_type: The final quantized data type.
:param zero_point: The zero-point value to use for quantization.
:param scale: The scale value to use for quantization.
:param axis: The quantization axis if quantizing per-channel. Defaults to None.
:param quant_weight_name: The name of the quantized initializer.
                          If not specified, the quantized name is generated.
:return: The quantized ONNX initializer.
Nr    zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rT   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawrt   )'tensor_proto_to_arrayr   r   shapelistr   taker   r   r   reshapeconcatenater$   TENSOR_NAME_QUANT_SUFFIXr   r   r^   	data_typedimsextendflattencopytobytesraw_datar   r   strrb   r`   ru   rp   rq   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)weightr   r   r   axisquant_weight_nameweight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r'   quantize_onnx_initializerr    s   ( (/K*.M|(5F5F5H%\#))$/K--.&(#}%A&++A4L!HM!+%5..0-&" (..u}}=S/T/\/\]i/jk & ))*EL):%6;;-PhOi@jMT%%222#//1)3&!!((5$1!(5(=(=(?(D(D(F(N(N(P%( &&:;E{{k///5==?mF[F[F]3]"/0A0A/BBc$,,.s34F5==?3B;O:PP[\b\h\h[iS!56t<=Q@ (   
((--t/?/?/E/EF	Fuzz5;;&??!7Ftuvv .}/D/D/FGH  ${{66}jR]R]_jpt6u  	 ==jIm>JRRSYS^S^_#00;;M=Yr+   c                   U [         R                  R                  :X  a  [        S5      eSnU(       a  [        R                  U 5      nO0U(       a  U [        ;   a
  [        U    nO[        R                  U 5      nU(       d  [        SU  S35      eUu  pEUS:  d  US:  a'  [        SU SU SUR                   S	U S
U SU  35      eU$ )z
Return qmin and qmax, the minimum and maximum value representable by the given qType
:parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
:return: qmin, qmax
z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   r^   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr.   ru   )r   r   r   qranger   r   s         r'   r   r   )  s     
&&333!"_``F,007	u ==.u5$((/07uvwwJDax4!86$x

|?<. Y"8E74
 	
 Mr+   c                "    [        XUS9u  p4XC-
  $ )z
Helper function to get the quantization range for a type.
    parameter qType: quantization type.
    return: quantization range.
r   )r   )r   r   r   r   r   s        r'   get_qrange_for_qTyper"  I  s     )	RJD;r+   c                @    U S:  a  X-   OU nUS:  =(       a    X!:  nX24$ )z
Helper function that tries to return a normalized axis in the range [0, rank - 1].
:parameter axis: The axis to normalize.
:parameter rank: The tensor rank (number of dimensions).
:return (is_valid, axis_norm)
r   r3   )r
  rank	axis_normis_valids       r'   normalize_axisr'  S  s-      $axTIA~2)"2Hr+   c                    [        U 5      nUS:X  a
  [        5       $ US-   S-  n[        U5      nSnSnXAS-
  :  a+  XS-      S-  S-  X   S-  -  X5'   US-  nUS-  nXAS-
  :  a  M+  XA:  a	  X   S-  X5'   U$ )a.  
Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
Assumes that the source values are already in the appropriate int4 range.
:parameter src_8bit: The 8-bit element values to pack.
:return A bytearray with every two 8-bit src elements packed into a single byte.
r   r    rI   rx   rK   )r   	bytearray)src_8bit	num_elemsdst_sizedstsrc_idst_is         r'   r  r  _  s     HIA~{A!#H
H
CEE a-
	*S0Q68?S;PQ


 a-

 _s*
Jr+   c                  (    \ rS rSrSr/ / S4S jrSrg)QuantizedInitializeri}  zB
Represents a linearly quantized weight input from ONNX operators
Nc
                p    Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        g r"   )	r$   initializerrminsrmaxszero_pointsscalesr   r   r
  )
r&   r$   r3  r4  r5  r6  r7  r   r   r
  s
             r'   __init__QuantizedInitializer.__init__  s4     	&

&	,	r+   )	r
  r   r3  r$   r   r5  r4  r7  r6  r4   r5   r6   r7   __doc__r8  r;   r3   r+   r'   r1  r1  }  s     r+   r1  c                  *    \ rS rSrSr    SS jrSrg)QuantizedValuei  zA
Represents a linearly quantized value (input\output\intializer)
Nc
                p    Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        g r"   )	original_nameq_name
scale_namezp_name
value_typer
  	node_type
node_qtype
scale_type)
r&   r$   new_quantized_namerA  zero_point_namequantized_value_typer
  rD  rE  rF  s
             r'   r8  QuantizedValue.__init__  s2     "($&.	"$$r+   )	r
  rE  rD  r?  r@  rA  rF  rC  rB  )NNNNr:  r3   r+   r'   r=  r=    s     %r+   r=  c                      \ rS rSrSrS rSrg)BiasToQuantizei  z#
Represents a bias to be quantized
c                (    Xl         X l        X0l        g r"   	bias_name
input_nameweight_name)r&   rO  rP  rQ  s       r'   r8  BiasToQuantize.__init__  s    "$&r+   rN  Nr:  r3   r+   r'   rL  rL    s    'r+   rL  c                   U R                   S:X  a  [        SU R                   S35      eU R                   S:X  a  U R                  nGO,U R                   S:X  a  U R                  nGOU R                   S:X  a  U R
                  nOU R                   S:X  a  U R                  nOU R                   S:X  a  U R                  nOU R                   S	:X  a  U R                  nOU R                   S
:X  a  U R                  nO}U R                   S:X  a  U R                  nO`U R                   S:X  a  U R                  nOCU R                   S:X  a  U R                  nO&[        SU R                   SU R                    S35      eU R                  U0$ )z
Convert attribute to kwarg format for use with onnx.helper.make_node.
    :parameter attribute: attribute in AttributeProto format.
    :return: attribute in {key: value} format.
r   z
attribute z does not have type specified.r    rI   rJ   rK   rL   rM   ry      	   r   z has unsupported type rT   )r   r.   r$   r   r   srQ   gfloatsintsstringstensorsgraphs)	attributer   s     r'   attribute_to_kwargr^    s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r+   c                ~    U Vs/ s H  o"R                   U :X  d  M  UPM     nn[        U5      S:  a  US   $ S$ s  snf )z
Helper function to find item by name in a list.
    parameter item_name: name of the item.
    parameter item_list: list of items.
    return: item if found. None otherwise.
r   N)r$   r   )	item_name	item_listitemitemss       r'   find_by_namerd    s@     (Bid99	+ATiEB5zA~58/4/ Cs   ::c                X    Sn[        [        U5      5       H  nX   U :X  d  M  UnM     U$ )z;
Helper function to return index of an item in a node list
r|   )r   r   )	elem_name	elem_listelem_idxr   s       r'   get_elem_indexri    s2     H3y>"<9$H # Or+   c                F    [         R                  R                  SX/U5      $ )z
Helper function to create a Mul node.
    parameter inputs: list of input names.
    parameter output: output name.
    parameter name: name of the node.
    return: Mul node in NodeProto format.
Mul)r   r   r   )inputsoutputr$   s      r'   get_mul_nodern    s     ;;  $??r+   c                l    U R                   R                  U R                  U-   U R                  -   5      $ )zh
Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
)parentjoinpathstemsuffix)filename
identifiers     r'   generate_identified_filenamerv  	  s+     ??##HMMJ$>$PQQr+   c                R   SS K nSS KJn  SS KnUR                  " UR
                  S9  [        S5        [        U 5        [        S5        [        U5        UR                  XSS9  UR                  S5        UR                  S5        UR                  S	5        UR                  5         g )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotr   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesr{  pltr   s        r'   
apply_plotr    s{    #	S[[1	,	$K	
	*JJtdJ+JJ~JJxII()HHJr+   c           	     n	  ^^^^^ SSK mSSKnSSKmSSKJs  Js  Jn  SSKJs  Js  Jn  SSK	J
mJmJm  [        R                  " SU  35         " UUUUU4S jSTR                  5      nTR!                  XS9n[#        [$        R&                  R)                  US5      S	5       nUR+                  U5        SSS5        TR-                  S5      nUR/                  S
5      n	/ n
[1        U R3                  5       5       H  nX   nUR5                  5       n[7        UR9                  SU5      R;                  5       5      [7        UR9                  SU5      R;                  5       5      /n[=        [?        U5      5      nU	RA                  U5      nU	RA                  U5      nURC                  U	5        URE                  U	U5        URG                  U	U5        URI                  U	5      nU
RK                  U5        M     URM                  U	[O        U
5      5        U
 H  nU	RQ                  U5        M     U	RS                  5       nURU                  U	5        URW                  U	U5        URY                  U	5      nU	R[                  U5        U	R]                  5       n[#        [$        R&                  R)                  US5      S5       nUR+                  U5        SSS5        [$        R^                  R9                  SS5      S;   a  UR                  Ra                  US5      nURc                  5       n[e        U5       H\  nURg                  U5      n[        R                  " URi                  5       5        [        R                  " URk                  5       5        M^     [#        [$        R&                  R)                  US5      S	5       n[1        U R3                  5       5       H  nX   nUR5                  5       n[7        UR9                  SU5      R;                  5       5      [7        UR9                  SU5      R;                  5       5      /nUS-   [=        [?        U5      5      -   nUR+                  U5        UR+                  S5        M     SSS5        g! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       g= f)z6
Helper function to write calibration table to files.
r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                  *   > \ rS rSrU UUUU4S jrSrg)*write_calibration_table.<locals>.MyEncoderi2  c                j  > [        UTT45      (       a  UR                  5       $ [        UTR                  5      (       a'  UR                  5       [	        UR
                  5      SS.$ [        UT5      (       a"  UR                  R                  [	        U5      S.$ TR                  R                  X5      $ )Nznumpy.array)r   ru   CLS)r  r   )
r   to_dictr   tolistr  ru   	__class__r4   JSONEncoderdefault)r&   objr  r  r  jsonnps     r'   r  2write_calibration_table.<locals>.MyEncoder.default3  s    #
K899{{}$#rzz** #

s399~m\\#011"}}55CII##++D66r+   r3   N)r4   r5   r6   r7   r  r;   )r  r  r  r  r  s   r'   	MyEncoderr  2  s    	7 	7r+   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0)r    1zcalibration.cache 
)6r  flatbuffersr   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrater  r  r  logginginfor  dumpsopenospathjoinwriter   Buildersortedkeysr  floatr  rb  r  r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndr   TrtTableStartDictVectorr   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrX  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   r  r  r  r  r  s                           @@@@@r'   write_calibration_tabler  "  s   
 LLLL]]LL&'8&9:;7 7D$$ 7 

,
<I	bggll3 23S	9T

9 
: 88A;D!!$'GN',,./"'>>#(,,y$/4467(,,x.3356
 CK '',))%0
w'2!!':6((1	i(# 0& $$Wc..AB#	''	2 $!!#I7#Wi0$$W-INN9
..
C	bggll3 9:D	AT

3 
B 
zz~~*C0H<%%77Q?	'')xA!q)ILL)LL*+ ! 
bggll3 34c	:d+0023C&+F~~'Hhll9d388:;hll8T2779:F #ICK 00EJJuJJt 4 
;	:g 
:	9L 
B	A 
;	:s%   "R7R6CR&
R
R#&
R4c                   U S:H  R                  [        R                  5      nU S:g  R                  [        R                  5      nUR                  5       nU R                  U-
  nU(       d  gU[        U5      -  [        U5      -  nUS:  d   SU SU SU 35       eU R                  [        R                  5      nXqU-  U* U-  -   -  nUS:*  R                  5       S:X  d   eU$ )aj  Given a discrete distribution (may have not been normalized to 1),
smooth it by replacing zeros with eps multiplied by a scaling factor
and taking the corresponding amount off the non-zero values.
Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
     https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   r   r   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1r  s           r'   smooth_distributionr  ~  s     Qu}}-H6//%--0KllnG'!Jw%
"33D#:Q'-
|74&QQ:88EMM"D(Nte{222DAI??!!!Kr+   c                    [         R                  " U R                  5       SS9n[        S UR                  R
                   5       5      $ )NF)load_external_datac              3  N   #    U  H  n[         R                  " U5      v   M     g 7fr"   )r   uses_external_data).0
intializers     r'   	<genexpr>*model_has_external_data.<locals>.<genexpr>  s!     mUlz#66zBBUls   #%)r   loadas_posixr   graphr3  )
model_pathmodels     r'   model_has_external_datar    s9    IIj))+FEmUZU`U`UlUlmmmr+   c                    [        5       nUR                  5       Ul        [        R                  Ul        0 nS/US'   [        U R                  5       U4SS/0UD6ng)z
    Generate model that applies graph optimization (constant folding, etc.)
    parameter model_path: path to the original onnx model
    parameter opt_model_path: path to the optimized onnx model
:return: optimized onnx model
ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  opt_model_pathsess_optionkwargs_s        r'   optimize_modelr    sb     !"K+9+B+B+DK(+A+R+RK(F%6$7F !,,.jH^G_jcijAr+   c                    SS0nU R                   (       a:  U R                    H*  nUR                  UR                  UR                  05        M,     [        R
                  R                  X5        g)z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r'   add_pre_process_metadatar    sS    .0CDN((D!!488TZZ"89 )KK6r+   c                    U R                   (       a7  U R                    H'  nUR                  S:X  d  M  UR                  S:X  d  M'    g   g)zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r
  s     r'   model_has_pre_process_metadatar    s<    ((Dxx33

FY8Y ) r+   c                    SS0nU R                   (       a:  U R                    H*  nUR                  UR                  UR                  05        M,     [        R
                  R                  X5        g )N
onnx.inferr  r  )r  r  r  s      r'   add_infer_metadatar    sS    "$78N%%A!!155!''"23 &KK6r+   c                    U R                   (       a7  U R                    H'  nUR                  S:X  d  M  UR                  S:X  d  M'    g   g)Nr  r  TFr  )r  r  s     r'   model_has_infer_metadatar    s;    %%Auu$4G)G & r+   c                    U R                    Vs/ s H'  oR                  (       a  UR                  S:X  d  M%  UPM)     nn[        U5      S:w  a  [        S5      eUS   R                  nU$ s  snf )Nr   r    z$Failed to find proper ai.onnx domainr   )opset_importdomainr   r.   version)r  opsetai_onnx_domainopset_versions       r'   get_opset_versionr    sf    ).););m);<<SXS_S_clSle);Nm
>a?@@"1%--M ns
   $A*A*c                   [        U 5      nUn[        USU5      nUS:  a;  U[        R                  R                  :X  a  [
        R                  " SU S35        SnOCUS:X  a  [
        R                  " SU S35        O"US:  a  [
        R                  " SU S35        SnX2:w  a*  [        R                  R                  X5      n [        U 5      n U $ )	Nrc      z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.r   ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.   )
r  getattrr   r   r^   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  weight_typer  target_opset_versionweight_quant_types        r'   update_opset_versionr'    s    %e,M(]KHr/43C3C3P3PP2=/ B1 1	

  "	"	2=/ BM M	

 
	2=/ B1 1	

  ",&&66uS 7u=Lr+   c                    [        U S5      n[        R                  R                  [	        U 5      [	        U5      5        [        R
                  " UR                  5       5      n[        U5        UR                  5         U$ )Nz	-inferred)	rv  r   shape_inferenceinfer_shapes_pathr  r  r  r  unlink)r  inferred_model_pathr  s      r'   load_model_with_shape_inferr-    s`    6z;O**3z?C@S<TUII)2245Eu Lr+   c                   [         R                  " SS9 n[        R                  " U 5      n[	        U5      R                  S5      n[        R                  " X#R                  5       SS9  [        U5      sS S S 5        $ ! , (       d  f       g = f)Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr   deepcopyr   rq  r   
save_modelr  r-  )r  quant_tmp_dir
model_copyr  s       r'   r#  r#    sa    		$	$L	9]]]5)
-(11,?

$7$7$9QUV*:6	 
:	9	9s   AA==
Bc                   U R                   [        R                  R                  [        R                  R                  4;   a  [
        R                  R                  U 5      $ [        SU R                   S[        U R                       35      e)Nz&Only float type is supported. Weights z is )r   r   r   r   r   r   r  to_arrayr.   r$   type_to_name)r3  s    r'   r   r     su    !7!7!=!=z?U?U?]?] ^^  ))+66

01A1A0B$|T_TiTiGjFkl r+   c                    U S-   $ )N_QuantizeLinearr3   tensor_names    r'   add_quant_suffixr>    s    ***r+   c                    U [         -   $ r"   )QUANT_INPUT_SUFFIXr<  s    r'   add_quant_input_suffixrA    s    +++r+   c                    U S-   $ )N_QuantizeLinear_Outputr3   r<  s    r'   add_quant_output_suffixrD    s    111r+   c                    U S-   $ )N_DequantizeLinearr3   r<  s    r'   add_dequant_suffixrG  !  s    ,,,r+   c                    U S-   $ )N_DequantizeLinear_Inputr3   r<  s    r'   add_dequant_input_suffixrJ  %  s    222r+   c                    U [         -   $ r"   )DEQUANT_OUTPUT_SUFFIXr<  s    r'   add_dequant_output_suffixrM  )  s    ...r+   )NN)FN)FNNN)r   numpy.ndarrayr   onnx.TensorProto.DataTyper   boolr   rP  r   float | Noner   rQ  r   rQ  returnz#tuple[numpy.ndarray, numpy.ndarray])rR  z2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray])r	  onnx.TensorProtor   rO  r   rN  r   rN  r
  z
int | Noner  z
str | NonerR  rS  )FF)r
  intr$  rT  rR  ztuple[bool, int])r*  r  rR  r)  )rt  r   ru  r  rR  r   )rT   )g-C6?)r  r   )r  r   r  r   )r  r   )r  r   rR  rP  )r  r   rR  rT  )r  r   r$  rG   rR  r   )r  r   rR  r   )r  r   rR  r   )r3  r   rR  rN  )r=  r  rR  r  )rR  r  )o
__future__r   r   r  r  r1  enumr   pathlibr   r   r   r   r   r   r	   r   onnx.helperr
   r   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr@  DEQUANT_OP_NAMErL  r   MODEL_SIZE_THRESHOLDr   r  r   r  rT  r9  r   r=   rG   rg   rV   ru   rX   r\   rZ   r^   rb   r`   r   r   rq   rp   rs   rr   r  r  r  r   r   r   r   r   r   r  r   r"  r'  r  r1  r=  rL  r^  rd  ri  rn  rv  r  r  r  r  r  r  r  r  r  r  r'  r-  r#  r   r>  rA  rD  rG  rJ  rM  )ks   0r'   <module>re     s   #   	      > > & Q Q - P P@?
7 	 , $2 ' !  474Dq4Dq
SZ[fhiSjloHp*Q'*4Dqt  #> #>L$   V!4  %++g"6  %++g"6!!5;;x#8''  %    5;;q#DekkRU]b]h]hFi"j%++d%**"Eu{{SV^c^h^hGi!j!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q  5;;q#>BV[@\"]%++b"=u{{1TX?Y!Z    5;;q#DekkRU]b]h]hFi"j%++d%**"Eu{{SV^c^h^hGi!j!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q	!    5;;q#DekkRU]b]h]hFi"j%++c"DekkRT\a\f\fFg!h!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q  5;;q#=u{{1TX?Y"Z%++b"=u{{1TX?Y!Z  )+ A 13h<~"R #'"&"&;G
;G);G ;G 	;G
 !;G  ;G  ;G );G~ hl:=7:=D $(L L )L  L  	L 
 L  "L  L ^@	< >% %8' '"#J0@R$Yx2n
k 77!H7+,2-3/_   L  DE  $ rsB   \ \ (\- \<+\<\\
\*)\*-\98\9