U
    hk#                     @   s6  d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ	 d dl
Z
d dlmZmZmZmZ d dlmZ ddlmZ ddlmZ eeZG dd	 d	Zd
d Zedkr2e Zejreej ejZ ej!Z"ej#$e"re%de" d e&de" de
'e Z(ee(ej)ej*ej+dZ,e,-  e,j(.e"d dS )    N)ListTuple)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4   )	ONNXModel)attribute_to_kwargc                   @   s   e Zd ZdZdZdZdeeedddZe	e
e eeef dd	d
ZejejdddZee
e edddZe
e dddZdd ZdS )MatMulBnb4QuantizerzMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typer   r	   N)model
quant_type
block_sizec                 C   s@   |pg }|t jt jfkstt|| _|| _|| _t|| _	d S )N)
r   FP4NF4AssertionErrorr
   r   r   r   setnodes_to_exclude)selfr   r   r   r    r   R/tmp/pip-unpacked-wheel-socb9apf/onnxruntime/quantization/matmul_bnb4_quantizer.py__init__&   s    
zMatMulBnb4Quantizer.__init__)
graph_pathreturnc                 C   sL   t t|d ddD ]2}|| }|jD ]}|j| kr&||f    S q&qdS )Nr	   )NN)rangeleninitializername)r   r   gidgraphZtensorr   r   r   Z__get_initializer.   s    

z%MatMulBnb4Quantizer.__get_initializer)fpweightr   c                 C   s   t |jdkrtd|  }|j\}}|| }| j}|| d | }|d d }tj|dd}	tj||jd}
t	|	||
|| j
|| |	|
fS )z4b quantize fp32/fp16 weight   z9Current bnb4 block quantization only supports 2D tensors!r	   Zuint8)dtype)r   shape
ValueErrorZ	transposecopyr   npzerosr$   r   r   )r   r"   Z
fpweight_trowscolsZnumelr   Z
num_blocksZquantized_numelpackedabsmaxr   r   r   bnb4_block_quant7   s    
z$MatMulBnb4Quantizer.bnb4_block_quant)nodegraph_stackr   c                 C   s  |j dkr|S td|j d |j| jkrFtd|j d |S |jd }t||\}}|dkrvtd |S tj	
|}t|jd	krtd
 |S | |\}}tj	|}	|jd |	_|jD ]}
|
j|kr|j|
  qqtj	|}|jd |_|j|	|g i }|j\}}||d< ||d< | j|d< | j|d< tjjd|jd |	j|jg|jd g|jrz|jd nddd|}td|j d |S )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeZMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...r	   Nz2MatMul doesn't have const weight. Skip to quantizer#   z)MatMul weight is not 2D. Skip to quantizeZ_Bnb4Z_absmaxKNr   r   
MatMulBnb4r    com.microsoft)inputsoutputsr   domainzcomplete quantization of )r3   )op_typeloggerdebugr   r   inputr   %_MatMulBnb4Quantizer__get_initializeronnxZnumpy_helperZto_arrayr   r%   r.   Z
from_arrayremover   extendr   r   helper	make_nodeoutput)r   r/   r0   ZinputBBZBs_graphZB_arrayr,   r-   ZB_quantr<   Zabsmax_tensorkwargsr*   r+   Zmatmul_bnb4_noder   r   r   _bnb4_matmul_node_weightM   sV    








 
	z,MatMulBnb4Quantizer._bnb4_matmul_node_weight)r0   c                 C   s  g }|d }|j D ]}dd |jD }t|ri }|jD ]}|jtjjkrh||j |j	| 
|i}nN|jtjjkrg }	|jD ] }
||
 |	| 
|g q|j	|	i}nt|}|| q8tjj|j|j|jfd|j	i|}|| || q|d |j | |  |S )Nr   c                 S   s,   g | ]$}|j tjjks$|j tjjkr|qS r   )typer>   AttributeProtoGRAPHGRAPHS).0attrr   r   r   
<listcomp>   s    z9MatMulBnb4Quantizer._process_subgraph.<locals>.<listcomp>r   r/   )r/   	attributer   rG   r>   rH   rI   appendgr   _process_subgraphrJ   Zgraphsr@   r   updaterA   rB   r9   r<   rC   rF   Z
ClearFieldpop)r   r0   Z	new_nodesr!   r/   Zgraph_attrsrE   rL   kvvalueZsubgraphr   r   r   rQ      sD    



  
z%MatMulBnb4Quantizer._process_subgraphc                 C   sd   | j  g}| j  }d}|D ]}|jdkrd}q|sL|tjddg | | | j 	  d S )NFr5   Tr	   )
r   r!   opset_importr8   r@   r>   rA   Zmake_opsetidrQ   Zclean_initializers)r   r0   rV   Zhas_ms_domainZopsetr   r   r   process   s    


zMatMulBnb4Quantizer.process)N)__name__
__module____qualname____doc__r   r   r   intr   staticmethodr   r   r   r   r=   nptZ	ArrayLiker(   Zndarrayr.   r   rF   rQ   rW   r   r   r   r   r      s   7&r   c                  C   s   t jdd} | jdddd | jdddd | jd	d
dtjtjgdd | jdd
ddd | jddd
dd | jd
d | jddtd
g dd |  S )Na  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFr	   z&Quantization data type. 0: FP4, 1: NF4)r`   defaultoptionsra   z--block_size@   zVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r`   rb   r_   z-vz	--verbose
store_true)r`   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrG   r`   rb   ra   )	argparseArgumentParseradd_argumentr   r   r   set_defaultsstr
parse_args)parserr   r   r   ro      s:    	
	ro   __main__zfile z already exists)r   T)/rj   loggingostypingr   r   Znumpyr(   Znumpy.typingr^   r>   Zonnx.onnx_pbr   r   r   r   Zonnxruntime.capi._pybind_stater   Z
onnx_modelr
   Zquant_utilsr   	getLoggerrX   r:   r   ro   argsrg   setLevelDEBUGZinput_modelZinput_model_pathZoutput_modelZoutput_model_pathpathexistserror	Exceptionloadr   r   r   r   ZquantrW   Zsave_model_to_filer   r   r   r   <module>   s6   
 "'

