U
    }h#                     @   s   d dl Zd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zejdddgdd Zdd ZdS )    N)
csr_matrix)mutual_info_classifmutual_info_regression)_compute_mi)check_random_state)assert_allcloseassert_array_equalc                  C   s   t dddddg} t dddddg}dt d dt d   }}dt d dt d  dt d  }|| | }tt| |ddd	| d S )
Nr      g333333g333333?g?gɿg?T
x_discrete
y_discrete)nparraylogr   r   )xyZH_xZH_yZH_xyZI_xy r   T/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/feature_selection/tests/test_mutual_info.pytest_compute_mi_dd   s     *r   c                 C   s   t d}d}d}d}t |d || | g|| | |d gg}t |t | dt t j|  }td}|j||ddj| dd	}|d d df |d d df  }	}
d
D ]$}t	|	|
dd|d}t
||dd qd S )N   r	   
         ?r     sizeFcopy         r   r   n_neighbors皙?Zrtol)r   zerosr   r   ZlinalgZdetr   multivariate_normalastyper   r   )global_dtypemeanZsigma_1Zsigma_2ZcorrcovI_theoryrngZr   r   r"   
I_computedr   r   r   test_compute_mi_cc   s,    
*"    r/   c           
   	   C   s   d}t d}dD ]}|j|d|k}t|| }|dk}|jddt|d||< |jddt| d|| < dd| td	d|   |td	|   td	  td }d
D ]$}t||dd|d}	t|	|dd qqd S )Nr   r   )g333333?r   gffffff?r   r	   r   g      r   r   TFr!   r#   r$   )r   uniformr   emptysumr   r   r   )
r(   	n_samplesr,   pr   r   maskr+   r"   r.   r   r   r   test_compute_mi_cd>   s,    4    r7   c                 C   s   d}t jj|ddk}t || }|dk}t jjddt |d||< t jjddt | d|| < t||dd	d
}t |df}t |df}t||dd	d
}t|| d S )Nd   r   r   r   r0   r	   r   TFr
   r   )r   randomr1   r2   r3   r   Zhstackr   )r(   r4   r   r   r6   mi_1mi_2r   r   r   test_compute_mi_cd_unique_labele   s     r<   c                 C   sz   t jdddgdddgdddgdddgdddgg| d}t dddddg}t||dd}tt | t dddg d S )Nr   r	   r   dtypeTdiscrete_features)r   r   r   r   argsort)r(   Xr   mir   r   r   !test_mutual_info_classif_discretey   s    * rD   c           	   	   C   s   t ddddgddddgddddgddddgg}||j}t d}td}|j||dd	j| d
d}|d d dd f }|d d df }t||dd}t	t 
| t dddg |jt jkstd S )Nr	   r   r   r   r#   g           r   r   Fr   random_state)r   r   dotTr%   r   r&   r'   r   r   rA   r>   float64AssertionError)	r(   rI   r*   r)   r,   r-   rB   r   rC   r   r   r   test_mutual_info_regression   s    2
rL   c                 C   s  t d}|ddj| dd}|d d df  |d d df 7  < d|d d df  |d d df  dkt}|d d df dk|d d df< t||dgddd	}tt| dddg d
D ]V}t||dg|dd	}|d |d kst|d |d kst|d |d kstqd S )Nr   r   r   Fr   r	   r   r   )r@   r"   rG   )r   r    	   )	r   Zrandr'   intr   r   r   rA   rK   )r(   r,   rB   r   rC   r"   Zmi_nnr   r   r   test_mutual_info_classif_mixed   s$    $.     rO   c              
   C   s  t jdddgdddgdddgdddgdddgg| d}t jdddddg| d}t|}ttfD ]b}tt |||dd W 5 Q R X tt |||dd W 5 Q R X tt |||dddgd W 5 Q R X tt |||ddddgd W 5 Q R X tt |||dd	gd W 5 Q R X |||d
dd}|||ddd}|||d
dd}|||ddd}|||dddgdd}	|||ddgdd}
t	|| t	|| t	|	|
 t 
||r^tq^d S )Nr   r	   r   r=   Fr?   ZmanualTrE   autor@   rG   )r   r   r   r   r   pytestZraises
ValueError
IndexErrorr   ZallcloserK   )r(   rB   r   ZX_csrZmutual_infor:   r;   Zmi_3Zmi_4Zmi_5Zmi_6r   r   r   test_mutual_info_options   s6    *  


rU   
correlatedTFc                 C   s   t j|}d}|jd|d}| r0|t j}n|jdd|d}t|dddf |dg|d}t|dddf |d	g|d}|t	
|kstdS )
zCheck that `mutual_info_classif` and `mutual_info_regression` are
    symmetric by switching the target `y` as `feature` in `X` and vice
    versa.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23720
    r8   r   r   r   r	   NFrQ   T)r   r9   RandomStaterandintr'   rJ   normalr   r   rR   ZapproxrK   )rV   global_random_seedr,   ndcZ
mi_classifZmi_regressionr   r   r   3test_mutual_information_symmetry_classif_regression   s&    	      r^   c                 C   sb   t j| }|jddd}|jt jdd}|jddd}t||| d}t||| d}t|| dS )zqCheck that results agree when X is integer dtype and float dtype.

    Non-regression test for Issue #26696.
    r8   )r8   r   r   Tr   rF   N)r   r9   rW   rX   r'   rJ   r   r   )rZ   r,   rB   ZX_floatr   expectedresultr   r   r   'test_mutual_info_regression_X_int_dtype   s    ra   )Znumpyr   rR   Zscipy.sparser   Zsklearn.feature_selectionr   r   Z&sklearn.feature_selection._mutual_infor   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r/   r7   r<   rD   rL   rO   rU   markZparametrizer^   ra   r   r   r   r   <module>   s"   #'!
