
    7h                        S SK r S SKJr  S SKrS SKJr  SSKJrJr  SSK	J
r
Jr  SSKJr  SSKJrJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJr  \ R6                  " \5      r\" \R<                  SSS9r\" \R@                  RB                  RD                  SS\RF                  S9r$\R@                  RB                  r!\R@                  RJ                  r%\R@                  RL                  r&SS jr'SS jr(g)    N)Any)mm_args   )configlowering)CppGemmTemplateCppWoqInt4GemmTemplate)create_epilogue_with_attr)expandregister_lowering)WeightInt4PackMatmul)autotune_select_algorithmExternKernelChoicerealize_inputs)use_aten_gemm_kernelsuse_cpp_gemm_template)Vzat::_weight_int8pack_mmF)has_out_variantz*at::native::_weight_int4pack_mm_cpu_tensor)r   kernel_creatorc                  b   [         R                  " [        R                  [        R
                  [        R                  /5        [         R                  " [        R                  5        [         R                  " [        R
                  5        [         R                  " [        R                  5        g N)r   add_needs_realized_inputs	quantized
max_pool2d
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightmake_fallback     ]/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/quantized_lowerings.pyregister_quantized_opsr"   '   sn    &&  ;;88	
 9//0:JJK:GGHr    c                      [        [        R                  S S9S S.S[        R                  S[        R                  S[        R                  S[
        S[
        4
S jj5       n [        [        R                  S S9S S.S[        R                  S[        R                  S	[        S
[        R                  S[
        S[
        4S jj5       n[        R                  " [        R                  5        [        R                  " [        R                  5        g )N)type_promotion_kind)layoutinputweightscaler%   returnc                  ^^ [        XTSS9u      nmpVUR                  5       [        R                  [        R                  [        R
                  4;   a"  UR                  5       [        R                  :X  d   eTn[        5       (       a  [        R                  XVT4U5      /O/ nS[        R                  S[        4UU4S jjn	[        XuUSS9(       a  [        R                  " UUXVT/SU	S9  [        SXUT/U5      $ )	NT)r%   mat2_transposedbufr)   c           
      T   > [        U S[        [        TTR                  5      5      S9$ )Nmul)other)r
   r   r   size)r,   r%   r(   s    r!   _mul_epilogue?register_woq_mm_ops.<locals>.int8pack_mm.<locals>._mul_epilogueO   s'    ,U.v{{1K"L r    )r+   )trans_wepilogue_creator_weight_int8pack_mm)r   	get_dtypetorchbfloat16float16floatint8r   aten__weight_int8pack_mmbindTensorr   r   r   add_choicesr   )
r&   r'   r(   r%   _mat1mat2aten_layoutchoicesr1   s
     ``      r!   int8pack_mm(register_woq_mm_ops.<locals>.int8pack_mm5   s     '.&$'
#1a NN LL EJJ.	
/ 
 %&& &**D+>LM 		u|| 	 	 	
 !D$O''U#!. )!74,?
 	
r    
qGroupSizeqScaleAndZerosc          	      X   [        XUSSS9u      pTpgUR                  5       [        R                  [        R                  [        R
                  4;   a"  UR                  5       [        R                  :X  d   e[        R                  R                  [        R                  " U[        R                  S9S S9nUn	[        5       (       a  [        R                  XgX4U	5      /O/ n
[        R                   (       d  [        R"                  (       aR  [%        U	UUSSUS9(       a?  UR'                  5       R)                  5       (       a  [*        U   R-                  U
U	XgX/5        S[        R.                  R0                  R2                  S[        R4                  4S jnUS	 S
.n[7        SU
XgX/U	US9$ )NT)r%   use_4x2_dimr+   )dtype)name)r+   is_woq_int4q_group_sizexr)   c                     U R                  5       R                  5       (       d   eU R                  5       nU R                  5       n[        R
                  " SSU[        R                  US9$ )Nr      )rK   device)
get_layoutis_contiguousget_size
get_devicer7   randintuint8)rO   shaperR   s      r!   get_example_weightHregister_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.get_example_weight   sN    <<>//1111JJLE\\^F==Cekk&QQr    c                 X    [         R                  R                  U R                  5          $ r   )r   graph	constantsget_name)rO   s    r!   <lambda>>register_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.<lambda>   s    **1::<8r    )r      _weight_int4pack_mm_for_cpu)input_gen_fns)r   r6   r7   r8   r9   r:   rX   r   r]   add_tensor_constanttensorint64r   aten__weight_int4pack_mm_cpur=   r   max_autotunemax_autotune_gemmr   rS   rT   r	   r?   	_inductorirIRNoder>   r   )r&   r'   rG   rH   r%   r@   rA   rB   
group_sizerC   rD   rZ   rd   s                r!   int4pack_mm_cpu,register_woq_mm_ops.<locals>.int4pack_mm_cpua   s    '.&dD'
#1a NN LL EKK/	
0 WW00LL5;;7d 1 

  %&&	 -11<k  	   F$<$<% $ ' !//11":.::Z8	R%//"4"4";"; 	R 	R "8

 ))4'
 	
r    )r   atenr5   r7   r>   r   rc   intr   r   _dyn_quant_matmul_4bit_dyn_quant_pack_4bit_weight)rE   ro   s     r!   register_woq_mm_opsru   4   s    t//TJ )
||)
)
 ||)

 )
 
)
 K)
V t77TR C
||C
C
 C
 	C
 C
 
C
 SC
J 46674;;<r    )r)   N))loggingtypingr   r7    torch._inductor.kernel.mm_commonr    r   r   codegen.cpp_gemm_templater   r	   codegen.cpp_utilsr
   r   r   	mkldnn_irr   select_algorithmr   r   r   utilsr   r   virtualizedr   	getLogger__name__logr5   r<   opsr   int4mm_packed_weight_cpucreaterh   r   rq   r"   ru   r   r    r!   <module>r      s       4  N 8 / + 
 @  !-	8%   2	II000'..	   II	YY!!
yy~~
It=r    