U
    |h*M                     @   sF  d dl Zd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZ d dlmZ d dl m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( ej)*dddgej)*dddgdd Z+ej)*dddgej)*dddgdd Z,ej)*dddgej)*dddgd;ddZ-dd Z.dd  Z/d!d" Z0ej)*dd#dgd$d% Z1ej)*ddddgej)*dddgd&d' Z2ej)*d(d)d*gd+d, Z3d-d. Z4d/d0 Z5ej)*d1d2d3gd2d3gfd2d3ged2ed3d4fd2d3gd5d6 fgd7d8 Z6d9d: Z7dS )<    N)assert_allclose)ColumnTransformer)load_diabetes	load_irismake_classificationmake_regression)DummyClassifier)RandomForestClassifierRandomForestRegressor)SimpleImputer)permutation_importance)LinearRegressionLogisticRegression)
get_scorermean_squared_errorr2_score)train_test_split)make_pipeline)KBinsDiscretizerOneHotEncoderStandardScalerscale)parallel_backend)_convert_containern_jobs      max_samples      ?      ?c           	   	   C   s   t jd}d}tdd\}}||jd|jd d dd	}t ||g}td
dd}|	|| t
|||||| |d}|jj|jd	 |fkstt |jd |jd d kstd S )N*      T)Z
return_X_yMbP?r   r   sizer   
   n_estimatorsrandom_state	n_repeatsr)   r   r   )nprandomRandomStater   normalshapereshapehstackr
   fitr   importancesAssertionErrorallimportances_mean)	r   r   rngr+   Xyy_with_little_noiseclfresult r>   X/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/inspection/tests/test_permutation_importance.py9test_permutation_importance_correlated_feature_regression   s$     
r@   c              	   C   s   t d}tjd}d}t }|j|j }}||jd|j	d d 
dd}|j||jd	}||d
< tddd}	|	|| t|	||||| |d}
|
jj	|j	d |fkstt|
jd |
jd d kstd S )Npandasr    r!   r"   r   r#   r%   r   )columnsZcorrelated_featurer&   r'   r*   )pytestimportorskipr,   r-   r.   r   datatargetr/   r0   r1   	DataFrameZfeature_namesr	   r3   r   r4   r5   r6   r7   )r   r   pdr8   r+   Zdatasetr9   r:   r;   r<   r=   r>   r>   r?   @test_permutation_importance_correlated_feature_regression_pandas>   s*    
 
rI   r    c              	      s  t j|}d}d}d}d}d}|| }	t |}
|j|
|d t  fdd|
d | D }|t j}||k sxtt j	||
||gdd}|j||	fkstt| d	|d
\}}}}td|d}||| |j}|d | }||d  }| | k stt|||||| |d}|jj|jd |fks:t|jd | }|j|d  }tt |dksnt| dk st| dkstd S )Nr!     r   r   )r$   c                    s   g | ]} |k d dqS )r%   r   )r1   ).0cr:   r>   r?   
<listcomp>y   s     zEtest_robustness_to_high_cardinality_noisy_feature.<locals>.<listcomp>)Zaxisr   )Z	test_sizer)   r'   r*   gHz>g?g333333?)r,   r-   r.   arangechoicer2   astypeZfloat32r5   ZconcatenateZrandnr0   r   r	   r3   Zfeature_importances_maxminr   r4   r7   abs)r   r   seedr8   r+   	n_samplesZ	n_classesZn_informative_featuresZn_noise_features
n_featuresclassesr9   ZX_trainZX_testZy_trainZy_testr<   Ztree_importancesZinformative_tree_importancesZnoisy_tree_importancesrZinformative_importancesZnoisy_importancesr>   rM   r?   1test_robustness_to_high_cardinality_noisy_featuref   sR    
    
rZ   c                  C   s$  t jd} d}t dddt jgddddggj}t ddddg}tt td	d
}|	|| t
||||| d}|jj|jd |fkstt |jd |jd d kstt jd} t
||||| d}|jj|jd |fkstt |j|jrtt |jd |jd d ks td S )Nr       r          @      @r   r   r   lbfgsZsolverr+   r)   r%   )r,   r-   r.   arraynanTr   r   r   r3   r   r4   r0   r5   r6   r7   Zallclose)r8   r+   r9   r:   r<   r=   Zresult2r>   r>   r?   'test_permutation_importance_mixed_types   s    ""rd   c            	      C   s   t d} tjd}d}| dddtjgddddgd	}td
dd
dg}tt	 t
 }td|dgfdt dgfg}t|tdd}||| t|||||d}|jj|jd |fkstt|jd |jd d kstd S )NrA   r    r!   r   r\   r]   ab)col1col2r   r   numrg   catrh   r^   r_   r`   r%   )rC   rD   r,   r-   r.   rG   rb   ra   r   r   r   r   r   r   r3   r   r4   r0   r5   r6   r7   )	rH   r8   r+   r9   r:   Znum_preprocess
preprocessr<   r=   r>   r>   r?   .test_permutation_importance_mixed_types_pandas   s    
"rl   c                  C   sf   t dddd\} }t| } t|}t | |}d|jd  }t|| |ddd}t||jd	d
d d S )N  r&   r   rV   rW   r)   r   2   neg_mean_squared_error)r+   scoringg?gư>)ZrtolZatol)r   r   r   r3   Zcoef_r   r   r7   )r9   r:   lrexpected_importancesresultsr>   r>   r?   .test_permutation_importance_linear_regresssion   s$           ru   rm   c           	   	   C   s   t dddd\}}t ||}t|||ddd| d}|d  }|d  }|| d	ks^tt|||ddd
d}t|d |d  td t|||ddd
d}W 5 Q R X t|d |d  d S )Nrm   r&   r   rn   r!   r   r*   r4   333333?r   )r+   r)   r   	threading)	r   r   r3   r   rS   rR   r5   r   r   )	r   r9   r:   rr   Zimportance_sequentialimp_minimp_maxZimportance_processesZimportance_threadingr>   r>   r?   ;test_permutation_importance_equivalence_sequential_parallel   sN                
      rz   c              	   C   sJ  t d}tdddd\}}||}tddd}||d	d
}t||g}|j	j
dksbtt|dr||| }n| }t|j}|||< || j	|j	ksttt|t|_tdddd}	|	|| d}
t|	|||
d| |d}|d  }|d  }|| dkstt|	|||
d| |d}t|d |d  d S )NrA   d   r!   r   rn      Zordinal)Zn_binsencoder%   r   fCategorical)r(   Z	max_depthr)   r*   r4   rv   )rC   rD   r   rG   r   Zfit_transformr1   r,   r2   Zdtypekindr5   hasattrr   ZravellenrB   rO   rQ   strindexr
   r3   r   rS   rR   r   )r   r   rH   r9   r:   ZX_dfZbinnerZ
cat_columnZnew_col_idxrfr+   Zimportance_arrayrx   ry   Zimportance_dataframer>   r>   r?   7test_permutation_importance_equivalence_array_dataframe(  sT    



	 r   
input_typera   Z	dataframec           	      C   s~   t dd }}t||dd\}}|jdks.tt|| }tdd||}d}t||||d	d
}t	||f}t
||j d S )Ng     j@r[   r   rn   g    .AZprior)Zstrategyr!   r   )r+   r   )intr   nbytesr5   r   r   r3   r   r,   zerosr   r4   )	r   rV   rW   r9   r:   r<   r+   rY   rs   r>   r>   r?   /test_permutation_importance_large_memmaped_datan  s      

r   c               	   C   s  t jd} d}d}|d }| dd||f}t |}d|d |df  |d |df  |d |< ||d df d||d df   ||d < tdd}||| t|||dd	d
d}|jd |jd  }|t	
ddkstt |}	t|||dd	d
|	d}|jd |jd  }
|
t	
|dks,tt t d|t d|g}	||||	 t|||dd	d
|	d}|jd |jd  }|| t	
ddkstd S )Nr   rJ   r   g        r"   r   F)Zfit_interceptZneg_mean_absolute_error   r)   rq   r+   g{Gz?r)   rq   r+   Zsample_weightg    _Br   )r,   r-   r.   r/   r   r   r3   r   r7   rC   Zapproxr5   Zonesr2   repeat)r8   rV   rW   Zn_half_samplesxr:   rr   piZx1_x2_imp_ratio_w_nonewZx1_x2_imp_ratio_w_onesZx1_x2_imp_ratio_wr>   r>   r?   )test_permutation_importance_sample_weight  s^    
,,
     
		r   c               
   C   s   dd } t ddgddgg}t ddg}t ddg}t }||| zt|||d| dd W n tk
r   td Y nX tt t|||d| d|d	 W 5 Q R X d S )
Nc                 S   s   dS )Nr   r>   Z	estimatorr9   r:   r>   r>   r?   	my_scorer  s    zJtest_permutation_importance_no_weights_scoring_function.<locals>.my_scorerr   r   r|   r[   r   zpermutation_test raised an error when using a scorer function that does not accept sample_weight even though sample_weight was Noner   )	r,   ra   r   r3   r   	TypeErrorrC   Zfailraises)r   r   r:   r   rr   r>   r>   r?   7test_permutation_importance_no_weights_scoring_function  s,    
      r   z list_single_scorer, multi_scorerr2rp   r   rp   c                 C   s$   t || |t|| | dS )Nr   )r   Zpredictr   r   r>   r>   r?   <lambda>  s    r   c           	   	   C   s   t dddd\}}t ||}t|||d|dd}t| t| ksLt| D ].}|| }t|||d|dd}t|j|j qPd S )Nrm   r&   r   rn   r   r   r   )	r   r   r3   r   setkeysr5   r   r4   )	Zlist_single_scorerZmulti_scorerr   r:   rr   Zmulti_importanceZscorerZmulti_resultZsingle_resultr>   r>   r?   (test_permutation_importance_multi_metric  s,              r   c               	   C   sd   t dgj} t ddddg}t }|| | d}tjt|d t|| |dd W 5 Q R X dS )	zjCheck that a proper error message is raised when `max_samples` is not
    set to a valid input value.
    )r   r\   r]   g      @r   r   z max_samples must be <= n_samples)matchr!   )r   N)	r,   ra   rc   r   r3   rC   r   
ValueErrorr   )r9   r:   r<   err_msgr>   r>   r?   -test_permutation_importance_max_samples_error  s    r   )r    )8Znumpyr,   rC   Znumpy.testingr   Zsklearn.composer   Zsklearn.datasetsr   r   r   r   Zsklearn.dummyr   Zsklearn.ensembler	   r
   Zsklearn.imputer   Zsklearn.inspectionr   Zsklearn.linear_modelr   r   Zsklearn.metricsr   r   r   Zsklearn.model_selectionr   Zsklearn.pipeliner   Zsklearn.preprocessingr   r   r   r   Zsklearn.utilsr   Zsklearn.utils._testingr   markZparametrizer@   rI   rZ   rd   rl   ru   rz   r   r   r   r   r   r   r>   r>   r>   r?   <module>   sf   &O
(D
?
