
    h@#                        S SK r S SKrS SKrS SKrS SKJr  S SKrS SK	J
r
JrJrJr  S SKJr  SSKJr  SSKJr  \R(                  " \5      r " S S5      rS	 r\S
:X  a  \" 5       r\R4                  (       a  \R7                  \R8                  5        \R:                  r\R>                  r \RB                  RE                  \ 5      (       a!  \RG                  S\  S35        \$" S\  S35      e\RJ                  " \5      r&\" \&\RN                  \RP                  \RR                  S9r*\*RW                  5         \*RL                  RY                  \ S5        gg)    N)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4   )	ONNXModel)attribute_to_kwargc                       \ rS rSrSrSrSrSS\S\S\4S	 jjr	\
S
\\   S\\\4   4S j5       rS\R"                  S\R&                  4S jrS\S\\   S\4S jrS\\   4S jrS rSrg)MatMulBnb4Quantizer   zMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typer   r   Nmodel
quant_type
block_sizec                     U=(       d    / nU[         R                  [         R                  4;   d   e[        U5      U l        X l        X0l        [        U5      U l        g N)	r   FP4NF4r	   r   r   r   setnodes_to_exclude)selfr   r   r   r   s        h/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/quantization/matmul_bnb4_quantizer.py__init__MatMulBnb4Quantizer.__init__%   sR    +1r1557J7N7NOOOOu%
$$ #$4 5    
graph_pathreturnc                     [        [        U5      S-
  SS5       H1  nX   nUR                   H  nUR                  U :X  d  M  XC4s  s  $    M3     g)Nr   )NN)rangeleninitializername)r#   r   gidgraphtensors        r   __get_initializer%MatMulBnb4Quantizer.__get_initializer-   sN    Z1,b"5COE++;;$&!=( , 6
 r   fpweightc           	         [        UR                  5      S:w  a  [        S5      eUR                  5       R	                  5       nUR                  u  p4X4-  nU R
                  nXV-   S-
  U-  nUS-   S-  n[        R                  " USS9n	[        R                  " XqR                  S9n
[        XXU R                  XC5        X4$ )z4b quantize fp32/fp16 weight   z9Current bnb4 block quantization only supports 2D tensors!r   uint8)dtype)r!   shape
ValueError	transposecopyr   npzerosr-   r   r   )r   r)   
fpweight_trowscolsnumelr   
num_blocksquantized_numelpackedabsmaxs              r   bnb4_block_quant$MatMulBnb4Quantizer.bnb4_block_quant6   s     x~~!#XYY '')..0
^^
__
(1,;
 19*/9*NN;VT__VZar   nodegraph_stackc                 X   UR                   S:w  a  U$ [        R                  SUR                   S35        UR                  U R                  ;   a%  [        R                  SUR                   S35        U$ UR
                  S   n[        R                  X25      u  pEUc  [        R                  S5        U$ [        R                  R                  U5      n[        UR                  5      S:w  a  [        R                  S	5        U$ U R                  U5      u  px[        R                  R                  U5      n	UR                  S
-   U	l        UR
                   H0  n
U
R                  U:X  d  M  UR
                  R                  U
5          O   [        R                  R                  U5      nUR                  S-   Ul        UR                   R#                  X/5        0 nUR                  u  pXS'   XS'   U R$                  US'   U R&                  US'   [        R(                  R*                  "  SUR
                  S   U	R                  UR                  /UR,                  S   /UR                  (       a  UR                  S
-   OSSS.UD6n[        R                  SUR                   S35        U$ )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...r   z2MatMul doesn't have const weight. Skip to quantizer+   z)MatMul weight is not 2D. Skip to quantize_Bnb4_absmaxKNr   r   r    com.microsoft)inputsoutputsr#   domainzcomplete quantization of )
MatMulBnb4)op_typeloggerdebugr#   r   inputr   %_MatMulBnb4Quantizer__get_initializeronnxnumpy_helperto_arrayr!   r.   r<   
from_arrayremover"   extendr   r   helper	make_nodeoutput)r   r>   r?   inputBBBs_graphB_arrayr:   r;   B_quantrO   absmax_tensorkwargsr5   r6   matmul_bnb4_nodes                   r   _bnb4_matmul_node_weight,MatMulBnb4Quantizer._bnb4_matmul_node_weightL   s;    <<8#K)$))D9:99---LL/		{:^_`KA);;FP9LLMNK##,,Q/w}}"LLDEK..w7##..v6vv'^^EzzV#%%e, $
 ))44V<VVi/##W$<=]]
ss#|#|;;00
JJqM7<<1C1CD[[^$(,		W$r"
 
 	04@Ar   c                    / nUS   nUR                    GH  nUR                   Vs/ s HW  nUR                  [        R                  R
                  :X  d*  UR                  [        R                  R                  :X  d  MU  UPMY     nnU(       Ga^  0 nUR                   H  nUR                  [        R                  R
                  :X  a9  UR                  UR                  5        UR                  U R                  U5      0nOUR                  [        R                  R                  :X  aV  / n	UR                   H5  n
UR                  U
5        U	R                  U R                  U5      /5        M7     UR                  U	0nO[        U5      nUR                  U5        GM     [        R                  R                   " UR"                  UR$                  UR&                  4SUR                  0UD6nUR                  U R)                  XA5      5        GM     UR+                  S5        UR                   R                  U5        UR-                  5         U$ s  snf )Nr   r#   r>   )r>   	attributetyperQ   AttributeProtoGRAPHGRAPHSappendgr#   _process_subgraphgraphsrV   r
   updaterW   rX   rL   rO   rY   rb   
ClearFieldpop)r   r?   	new_nodesr%   r>   attrgraph_attrsr`   kvvaluesubgraphs              r   rl   %MatMulBnb4Quantizer._process_subgraph   s   	BJJD !NN*D99 3 3 9 99TYY$J]J]JdJd=d *  
  NNDyyD$7$7$=$==#**4662"ii)?)?)LMd&9&9&@&@@ "(,H'..x8!LL$*@*@*M)NO )4 #ii//5MM"% + {{,,LL$**dkk@D		MS T::4MN7 : 	 

)$?s   AI>Ic                 p   U R                   R                  5       /nU R                   R                  5       nSnU H  nUR                  S:X  d  M  SnM     U(       d0  UR	                  [
        R                  R                  SS5      /5        U R                  U5        U R                   R                  5         g )NFrG   Tr   )
r   r%   opset_importrJ   rV   rQ   rW   make_opsetidrl   clean_initializers)r   r?   ry   has_ms_domainopsets        r   processMatMulBnb4Quantizer.process   s    zz'')*zz..0!E||. $ " !9!9/1!M NO{+

%%'r   )r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   r   r   intr   staticmethodlistr   tupler   rP   npt	ArrayLiker2   ndarrayr<   r   rb   rl   r~   __static_attributes__ r   r   r   r      s    W C C6j 6c 6s 6 D,< {T^G^A_     2::  ,5 Y 5 T*EU 5 Zc 5 n$T*-= $L(r   r   c            	         [         R                  " SS9n U R                  SSSS9  U R                  SSSS9  U R                  S	S
S[        R                  [        R
                  /SS9  U R                  SS
SSS9  U R                  SSS
SS9  U R                  S
S9  U R                  SS[        S
/ SS9  U R                  5       $ )Na  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFr   z&Quantization data type. 0: FP4, 1: NF4)r   defaultchoicesr   z--block_size@   zVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r   r   r   z-vz	--verbose
store_true)r   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrf   r   r   r   )	argparseArgumentParseradd_argumentr   r   r   set_defaultsstr
parse_args)parsers    r   r   r      s    $$F $=[\
(4>]^
$((*=*A*AB5   e	   kE,O
&
Q   r   __main__zfile z already exists)r   T)-r   loggingosnumpyr2   numpy.typingtypingr   rQ   onnx.onnx_pbr   r   r   r   onnxruntime.capi._pybind_stater   
onnx_modelr	   quant_utilsr
   	getLoggerr   rM   r   r   argsr   setLevelDEBUGinput_modelinput_model_pathoutput_modeloutput_model_pathpathexistserror	Exceptionloadr   r   r   r   quantr~   save_model_to_filer   r   r   <module>r      s     	    G G ? ! +			8	$^( ^(B$N z<D||&''))	ww~~'((u./?@% 12/BCCII&'EtZ^ZoZopE	MMO	KK""#4d; r   