
    Th1E                     f   S r SSKrSSKrSSKJr  SSKJr  SSKJrJ	r	J
r
Jr  \R                  R                  R                  r\R!                  S5        S\;   a"  \" \	\\
\/5      (       a  \R!                  S5         S(S jrSS\R&                  4S	 jrS
 rSS\R&                  4S jrS\R.                  4S jrS rS r\S 5       r\S 5       rS rS rS rS r S r!S r"S r#S r$S r%S\&S\&4S jr'Su  r(r)\'" \(S -
  5      r*S!\S"\&S#\&S\4S$ jr+S!\S"\&S#\&S\4S% jr,S& r-S\R                  4S' jr.g))z`Importing this file includes common utility methods for checking quantized
tensors and modules.
    N)Tensor)contextmanager)TEST_WITH_TSANIS_PPCIS_MACOS
IS_WINDOWSnoneqnnpackc                 p    [         R                  " U SU-  -   U-
  US-
  US-
  -  -
  U-  5      SU-  -   S-   $ )z7Computes the output shape given convolution parameters.      )npfloor)
input_sizekernel_sizepaddingstridedilationoutput_paddings         b/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/testing/_internal/common_quantized.py_conv_output_shaper      s\     88Z!g+-;{Q 1?& &)/0 134~3EFHIJ J    c                 R   Uc   [         R                  " U5      R                  nUc   [         R                  " U5      R                  n[         R                  " X-  U-   5      R                  [         R                  5      n[         R                  " XcU5      nUR                  U5      nU$ )zQuantizes a numpy array.)r   iinfominmaxroundastypeint64clip)xscale
zero_pointqminqmaxdtypeqxs          r   	_quantizer(      sy    |xx""|xx""	!)j(	)	0	0	:B	4	 B	5	BIr   c                 <    U R                  [        5      U-
  U-  nU$ )zDequantizes a numpy array.)r   float)r'   r"   r#   r!   s       r   _dequantizer+   &   s    	5	J	&%/AHr      c                 |    X-  R                  5       U-   n[        R                  " XcU5      R                  U5      nU$ )zdRequantizes a numpy array, i.e., intermediate int32 or int16 values are
converted back to given type)r   r   r    r   )r!   
multiplierr#   r$   r%   qtyper'   s          r   _requantizer0   ,   s9     .			!J	.B	4	 	'	'	.BIr   Fc                    U[         R                  [         R                  4;   d   eU[         R                  :X  a  U[         R                  :X  d   e[	        U [         R
                  5      (       a  U R                  5       n U[         R                  :X  a  U(       a  Su  pEOSu  pEOU(       a  Su  pEOSu  pEU R                  5       nU R                  5       nU[         R                  :H  nXg:X  a  Sn	Sn
OU(       aS  [        Xv* 5      nU* nXv-
  XT-
  -  n	[        U	[        R                  " [        R                  5      R                  5      n	Sn
O[        US5      n[        US5      nXv-
  XT-
  -  n	[        U	[        R                  " [        R                  5      R                  5      n	U[        Xi-  5      -
  n
[        XJ5      n
[        XZ5      n
[        U	5      [        U
5      /$ )tCalculate the dynamic quantization parameters (scale, zero_point)
according to the min and max element of the tensor)i?   )i   )r   r4   )r   r,         ?r           )torchper_tensor_affineper_tensor_symmetricqint8
isinstancer   numpyr   r   r   finfofloat32epsr   r*   int)Xr&   reduce_rangeqschemer$   r%   min_valmax_valis_symmetricr"   r#   s              r   _calculate_dynamic_qparamsrG   3   s    u..0J0JKKKK%,,,###!U\\""GGI JD$"JD$JD$JDeegGeegGu999L
'8,GhG&4;7Erxx

3778EJ'3'G'3'G&4;7Erxx

3778Ego 66JT.JT.J%L#j/**r   c                 l   [        U [        R                  5      (       a  U R                  5       n [        R                  " U5      R
                  [        R                  " U5      R                  p2X2-
  n[        R                  " U R                  S   [        R                  S9n[        R                  " U R                  S   [        R                  S9n[        UR                  S   5       H  nU R                  5       nU R                  5       n	X:X  a
  SXW'   SXg'   M2  [        U	S5      n	[        US5      nX-
  U-  XW'   [        XW   [        R                  " [        R                  5      R                  5      XW'   U[!        XU   -  5      -
  Xg'   [        X&U   5      Xg'   [        X6U   5      Xg'   M     XV4$ )r2   r   r&   r5   r6   )r;   r7   r   r<   r   r   r   r   zerosshapefloat64r   ranger=   r>   r?   r   )
rA   r&   r$   r%   n_levelsr"   r#   irD   rE   s
             r   &_calculate_dynamic_per_channel_qparamsrP   \   sU    !U\\""GGIU#''U);)?)?${HHHQWWQZrzz2E!''!*BHH5J:##A&'%%'%%'EHJM'3'G'3'G)X5EH58RXXbjj%9%=%=>EH 58);#<<JMm4JMm4JM ( r   c                    [        U [        [        45      (       aN  [        U 5      [        U5      :X  d   e[	        [        U 5      5       Vs/ s H  n[        X   X   5      PM     nnU$ UR                  (       a  UR                  5       nU R                  (       a  U R                  5       n X-
  R                  5       nUS:X  a  S[        S5      [        S5      4$ U R                  5       nXT-  nSUR                  5       -  nXTU4$ s  snf )a  Calculates the signal to noise ratio and returns the signal and noise
power, as well as the SNR in dB.
If the input is a list/tuple this function is called recursively on each
element. The result will have the same nested structure as the inputs.

Args:
    x, x_hat: Either a tensor or a nested list/tuple of tensors.
Returns:
    signal, noise, SNR(in dB): Either floats or a nested list of floats
r   r6   inf   )r;   listtuplelenrM   _snris_quantized
dequantizenormr*   log10)r!   x_hatidxresnoisesignalsnrsnr_dbs           r   rW   rW   v   s     !dE]##1vU###38Q=A=CtAFEJ'=A
  "~~LLNYEzE%L%,..VVXF
.C#))+F&   Bs   Dc              #   "  #    [         R                  R                  R                  nU [         R                  R                  l         S v   U[         R                  R                  l        g ! U[         R                  R                  l        f = f7fNr7   backends	quantizedengine)qenginepreviouss     r   override_quantized_enginerk      sX     ~~''..H&-ENN#3*2  '(  's   ABA+  B+!BBc              #     #     U (       a  [         R                  R                  5         S v   U (       a  [         R                  R                  5         g g ! U (       a  [         R                  R                  5         f f = f7frd   )r7   _C!_set_default_mobile_cpu_allocator#_unset_default_mobile_cpu_allocator)qengine_is_qnnpacks    r   "override_cpu_allocator_for_qnnpackrq      sQ     ;HH668HH88: HH88: s   A?)A 'A?(A<<A?c                    ^  U 4S jnU$ )Nc                  |   > [          H  n[        U5         T" U 0 UD6  S S S 5        M!     g ! , (       d  f       M3  = frd   )supported_qenginesrk   )argskwargsri   	qfunctions      r   test_fn"override_qengines.<locals>.test_fn   s1    )G*734*6* 43 *33s   	,
;	 )rw   rx   s   ` r   override_qenginesr{      s    +
 Nr   c                  P    [         R                  R                  R                  S:H  $ )Nfbgemmre   rz   r   r   qengine_is_fbgemmr~          >>##**h66r   c                  P    [         R                  R                  R                  S:H  $ )Nr
   re   rz   r   r   rp   rp      s    >>##**i77r   c                  P    [         R                  R                  R                  S:H  $ )Nonednnre   rz   r   r   qengine_is_onednnr      r   r   c                  P    [         R                  R                  R                  S:H  $ )Nx86re   rz   r   r   qengine_is_x86r      s    >>##**e33r   c                     [        [        U R                  5       5      5      nSX!'   XS'   U R                  [	        U5      5      nX24$ )Nr   )rT   rM   dimpermuterU   )rA   axisnew_axis_listys       r   _permute_to_axis_zeror      sB    quuw(MM!			%&'Ar   c           	         U R                   n[        U R                  [        R                  5      U5      u  p[        R
                  " U 5      n[        U R                  5       S   5       HI  n	[        R                  " [        R                  " X	   SX   -  -  X)   -   5      XE5      X)   -
  X   -  X'   MK     UR                  [        U5      5      n
U
R                  U5      $ Nr   r5   )r&   r   tor7   r>   
zeros_likerM   sizeclampr   r   rU   )rA   per_channel_scaleper_channel_zero_pointr   	quant_min	quant_maxr&   permute_axis_listr^   rO   outs              r   +_fake_quantize_per_channel_affine_referencer      s    GGE0emm1DdKA


1
C1668A;++ekk!$#8I8L2L*M(++, -.7DF\F_`ctcwx   ++e-.
/C66%=r   c                    UR                   n[        UR                  [        R                  5      U5      u  p[        R
                  " U5      n	[        UR                  5       S   5       H*  n
[        R                  " X   SX*   -  -  X:   -   5      X'   M,     U	R                  [        U5      5      n	X:  X:*  -  n[        R
                  " U 5      nX   X'   UR                  U5      $ r   )r&   r   r   r7   r>   r   rM   r   r   r   rU   )dYrA   r   r   r   r   r   r&   r   XqrO   maskr^   s                r   0_fake_quantize_per_channel_affine_grad_referencer      s    GGE0emm1DdKA			!	B1668A;ADC*;*>$>?BXB[[\  	E+,	-BO0D


2
CCI66%=r   c                    [        U [        R                  5      (       d  [        R                  " U 5      n OU R	                  5       R                  5       n U R                  [        R                  " U5      [        R                  S9$ )Ndevicer&   )	r;   r7   r   tensordetachcloner   r   r>   )rA   r   s     r   	to_tensorr      sT    a&&LLOHHJ44u||F+5==4AAr   nreturnc                     SU -  S-
  $ Nr   rz   )r   s    r   _n_onesr      s    Fa<r   )      r   r!   ebitsmbitsc                    U R                   [        R                  :X  d   eSU-   U-   S::  d   e[        US-
  5      n[        X-   5      nSX-   -  n[        [        U-
  S-
  5      nS[        U5      U-
  -  [        US-   5      SU-  -  -  nSSU-
  -  n[
        U-
  [        U-
  -   S-   n	U	[        -  n
[        R                  " U
[        R                  S9R                  [        R                  5      nU R                  [        R                  5      n U S-  nX-  n U R                  [        R                  5      n X:  n[        R                  " [        R                  " U5      X:  5      n[        R                  " [        R                  " X5      5      nX-   nUR                  [        R                  5      nUU
-  nUR                  [        R                  5      nU R                  [        R                  5      nU[        U-
  -	  S-  nU[
        -
  [        -  U-   nUU-  nUU-  nU[        U-
  -	  nUR                  [        R                  5      n[        R                  " X[        R                  S9n [        R                   " UUU 5      n [        R                   " UUU 5      n U[        ["        -   U-
  U-
  -	  nUR                  [        R                  5      nUU-  nU U-  n U R                  [        R                  5      $ )a  Convert FP32 numbers to sub-byte floating point numbers with the given
number of exponent and mantissa bits.

Input: torch.Tensor of dtype torch.float
Output: torch.Tensor of dtype torch.uint8, where the bit encoding is stored
in the least significant bits. e.g.
  fp4: bits 0-3 empty and bits 4-7 in fp4_e2m1 encoding
  fp6: bits 0-1 empty and bits 2-7 in fp6_e2m3 or fp6_e3m2 encoding

Note: there are no special values (NaN, inf) support in this code. Values
outside the representable range of Floatx after rounding are clamped to the
maximum Floatx magnitude (sign is preserved).

Code below is an adaptation of https://fburl.com/code/ciwofcg4

Background 1: last answer in https://stackoverflow.com/q/8981913
Background 2: Computer Organization and Design, RISC-V edition, Chapter 3.5
r   r   r   rI   l        )r&   r7   r*   r   	MBITS_F32F32_EXP_BIASr   int32viewr>   logical_andlogical_not
logical_orr   uint8	full_likewhere	EBITS_F32)r!   r   r   exp_biasmax_int	sign_maskmagic_adder
max_normal
min_normal
denorm_expdenorm_mask_intdenorm_mask_floatsignsaturate_maskdenormal_masknormal_mask
denormal_xnormal_xmant_odd
val_to_addsign_lps                        r   _f32_to_floatx_unpackedr      s   & 77ekk!!!u9u!!! uqy!Hem$Gem$I )e+a/0K wu~01WUQY5G1e85TUJ q8|$J 
	 u	 		  !I-O _EKKHMM 	
u{{Az>D 	
A 	
u{{A OM%%e&7&7&FWM##E$4$4]$RSK &J-J/!Ju{{+J
 vvekk"HY./14Hl*y8KGJ
HHI-.H{{5;;'H
 	%++6AM:q1AK1-A y9,u4u<=Gjj%G
 	!G	GA44r   c                    U R                   [        R                  :X  d   eSU-   U-   S::  d   eSX-   -  n[        US-
  5      n[        U5      nX-  nX-  nUS:H  n[        R                  " US:  Xr-	  S:H  5      n	Xr-	  n
X-
  [
        -   nUR                  [        R                  5      [        -  nXu-  R                  [        R                  5      nU[        U-
  -  nX-  nSX'   SU-
  [
        -   nUS:X  a  X-
  [        -  X'   Ot[        U5       HN  n[        SU-  SUS-   -  5       H2  nUU-
  nUSU-  -
  U[        -   U-
  -  nUU-
  [        -  nX-   XU:H  '   M4     MP     [        R                  " XU5      nUR                  [        R                  5      [        U-
  [        -   U-
  -  nUU-  nUR                  [        R                  5      $ )a  Convert sub-byte floating point numbers with the given number of exponent
and mantissa bits to FP32.

Input: torch.Tensor of dtype uint8, where the bit encoding is stored
in the least significant bits. e.g.
  fp4: bits 0-3 empty and bits 4-7 in fp4_e2m1 encoding
  fp6: bits 0-1 empty and bits 2-7 in fp6_e2m3 or fp6_e3m2 encoding
Output: torch.Tensor of dtype fp32 with the dequantized value
r   r   r   )r&   r7   r   r   r   r   r   r   r   rM   r   r   r   r*   )r!   r   r   r   r   mantissa_maskr   x_pos	zero_maskr   exp_biased_lpexp_biased_f32mantissa_lp_int32mantissa_f32resultdenormal_exp_biasedrO   mantissa_cmp
left_shiftsign_f32s                       r   _floatx_unpacked_to_f32r   e  s    77ekk!!!u9u!!!em$Iuqy!HENM mG KE
 
I
 %%uqyU^4IKM NM"-<N#&&u{{3y@N .225;;?$U):;L*F
 Fh,5 z!4!< J uA %a1faAEl ; #QY
 ,Q 7*U2  #6
"By!P #1 "|"CD !< " ]vF zz%++&9u+<y+H5+PQHhF;;u{{##r   c                     X-   S-
  U-  $ r   rz   )abs     r   ceil_divr     s    EAI!r   c                    U R                   u  p[        US5      n[        US5      nUS-  nUS-  nU nX4XV4:w  a5  [        R                  " XV4U R                  U R
                  S9nXSU2SU24'   UR                  USUS5      R                  SSSS5      nUR                  S	SS
S5      R                  SS5      R                  S	S
S5      n	U	R                  5       $ )a;  
Rearrange a large matrix by breaking it into blocks and applying the rearrangement pattern.

See:
    https://docs.nvidia.com/cuda/cublas/index.html#d-block-scaling-factors-layout

Args:
    input_matrix: Input tensor of shape (H, W)

Returns:
    Rearranged tensor of shape (32*ceil_div(H,128), 16*ceil_div(W,4))
      r   Nr   r   r             )rK   r   r7   rJ   r   r&   r   r   reshape	transposeflatten)
input_matrixrowscolsn_row_blocksn_col_blockspadded_rowspadded_colspaddedblocks
rearrangeds
             r   
to_blockedr     s     ##JDD#&LD!$L $K"KF|11k7@S@S[g[m[mn+uuete| [[sL!<DDQ1aPFAr1-771=EEb"bQJr   )r   )/__doc__r<   r   r7   r   
contextlibr   $torch.testing._internal.common_utilsr   r   r   r   rf   rg   supported_enginesrt   removeanyr   r   r(   r+   r0   r8   rG   rP   rW   rk   rq   r{   r~   rp   r   r   r   r   r   r   r@   r   r   r   r   r   r   r   r   rz   r   r   <module>r      s      % ] ]^^--??    & ! ""sFNHj+Y'Z'Zi( '(J *.D 	 12288  7<UE\E\ '+R4!6 3 3 ; ;7874

Bs s   	9y1}%sv sc s# s& spX$v X$c X$# X$& X$v   r   