U
    }hV                  
   @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+ G dd de
Z,G dd de
Z-G dd deZ.e/ Z0e0j1e0j2 Z1Z3ej45d Z6dd Z7dd Z8ej9:de1j;d d e<dfdd e=dfdd e<dfd d e<dfgd!d" Z>ej9:d#d d$e1j;d dgd%d& Z?ej9:d#d'd d(d d)d gd*d+ Z@ej9:d#d,d d$gd-d. ZAej9:d#d/d d0d d1d gd2d3 ZBG d4d5 d5e
ZCd6d7 ZDd8d9 ZEd:d; ZFe+d<d= ZGd>d? ZHej9:d@edAdBdCedBdDeddBdEedgdBdEgdFdG ZIe+dHdI ZJdJdK ZKdLdM ZLdNdO ZMdPdQ ZNdRdS ZOdTdU ZPdVdW ZQdXdY ZRdZd[ ZSd\d] ZTd^d_ ZUej9:d`e#ed dDe dafed dDeUfgdbdc ZVej9:ddeeegdedf ZWdgdh ZXej9:die<djdkge<dldmd gfdndo ZYej9:dpdqdrgdsdt ZZdS )u    N)Mock)datasets)BaseEstimator)CCAPLSCanonicalPLSRegression)make_friedman1)PCA)HistGradientBoostingClassifierRandomForestClassifier)NotFittedError)SelectFromModel)
ElasticNetElasticNetCVLassoLassoCVLogisticRegressionPassiveAggressiveClassifierSGDClassifier)make_pipeline)	LinearSVC)MinimalClassifierassert_allcloseassert_array_almost_equalassert_array_equalskip_if_32bitc                   @   s   e Zd Zdd ZdS )NaNTagc                 C   s   ddiS N	allow_nanT selfr   r   S/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/feature_selection/tests/test_from_model.py
_more_tags%   s    zNaNTag._more_tagsN__name__
__module____qualname__r#   r   r   r   r"   r   $   s   r   c                   @   s   e Zd Zdd ZdS )NoNaNTagc                 C   s   ddiS )Nr   Fr   r    r   r   r"   r#   *   s    zNoNaNTag._more_tagsNr$   r   r   r   r"   r(   )   s   r(   c                   @   s   e Zd Zdd ZdS )NaNTagRandomForestc                 C   s   ddiS r   r   r    r   r   r"   r#   /   s    zNaNTagRandomForest._more_tagsNr$   r   r   r   r"   r)   .   s   r)   c               
   C   sX   t dddd d d} dD ]<}t| |d}|tt tt |t W 5 Q R X qd S )N皙?
   Talphamax_itershufflerandom_statetol)Zgobbledigookz.5 * gobbledigook	threshold)	r   r   fitdataypytestraises
ValueError	transform)clfr3   modelr   r   r"   test_invalid_input8   s        r=   c                  C   s.   t  } t| d}|tt |j| ks*td S )N	estimator)r   r   r4   r5   r6   r?   AssertionErroresttransformerr   r   r"   test_input_estimator_unchangedC   s    
rD   zmax_features, err_type, err_msg   zmax_features ==c                 C   s   dS )Ng      ?r   Xr   r   r"   <lambda>T       rH   z3max_features must be an instance of int, not float.c                 C   s   t jd d S NrE   )r5   shaperF   r   r   r"   rH   Y   rI   c                 C   s   dS )Nr   rF   r   r   r"   rH   ^   rI   c              	   C   sR   t |}tddd}t|| tj d}tj||d |t	t
 W 5 Q R X d S )N   r   Zn_estimatorsr0   r?   max_featuresr3   match)reescaper   r   npinfr7   r8   r4   r5   r6   )rP   Zerr_typeerr_msgr;   rC   r   r   r"   test_max_features_errorK   s    
  rX   rP      c                 C   s   t ddd}t|| tj d}|tt}| dk	rV|j| ks@t|j	d |jks|tn&t
|drdt|j	d tj	d ks|tdS )z>Check max_features_ and output shape for integer max_features.rM   r   rN   rO   NrE   max_features_)r   r   rU   rV   fit_transformr5   r6   rZ   r@   rK   hasattrrP   r;   rC   X_transr   r   r"   "test_inferred_max_features_integero   s      r_   c                 C   s   dS rJ   r   rF   r   r   r"   rH      rI   c                 C   s
   | j d S rJ   rK   rF   r   r   r"   rH      rI   c                 C   s   t | jd dS NrE   i'  minrK   rF   r   r   r"   rH      rI   c                 C   sT   t ddd}t|| tj d}|tt}|j| tks<t|j	d |jksPtdS )z?Check max_features_ and output shape for callable max_features.rM   r   rN   rO   rE   N)
r   r   rU   rV   r[   r5   r6   rZ   r@   rK   r]   r   r   r"   #test_inferred_max_features_callable   s      rd   c                 C   s   t t| d d S )Nr   rY   )roundlenrF   r   r   r"   rH      rI   c                 C   sr   dddgdddgdddgd	d
dgg}ddddg}t ddd}t|| tj d}|||}|jd |jksntd S )Ngףp=
?gq=
ףpgףp=
?gRQg{Gzg333333gQ޿gffffffgQ?gGz?g?r   rE   rM   rN   rO   )r   r   rU   rV   r[   rK   rZ   r@   )rP   rG   r6   r;   rC   r^   r   r   r"   test_max_features_array_like   s      rg   c                 C   s   t | jd dS ra   rb   rF   r   r   r"   rH      rI   c                 C   s
   | j d S rJ   r`   rF   r   r   r"   rH      rI   c                 C   s   dS rJ   r   rF   r   r   r"   rH      rI   c                 C   sB   t ddd}t| d}t||tj d}|tt |t dS )z7Tests that the callable passed to `fit` is called on X.2   r   rN   )Zside_effectrO   N)	r   r   r   rU   rV   r[   r5   r6   Zassert_called_with)rP   r;   mrC   r   r   r"   test_max_features_callable_data   s
    
rj   c                   @   s   e Zd Zdd ZdddZdS )FixedImportanceEstimatorc                 C   s
   || _ d S N)importances)r!   rm   r   r   r"   __init__   s    z!FixedImportanceEstimator.__init__Nc                 C   s   t | j| _d S rl   )rU   arrayrm   feature_importances_)r!   rG   r6   r   r   r"   r4      s    zFixedImportanceEstimator.fit)N)r%   r&   r'   rn   r4   r   r   r   r"   rk      s   rk   c               	   C   sX  t jdddddddd\} }| jd }tddd	}t|tj d
}t||tj d}|| |}|| |}t|| tt	dddd}|| |}t
|jj}tj| dd}	td|jd d D ]z}
tt	ddd|
tj d}|| |}t
|jj}tj| dd}t| d d |	d |
 f | d d |d |
 f  qt|jj|jj d S )N  r+      r   F	n_samples
n_featuresn_informativen_redundant
n_repeatedr/   r0   rE   rh   rN   r?   r3   rO   g?*   r-   r0   r>   Z	mergesort)kind)r   make_classificationrK   r   r   rU   rV   r[   r   r   abs
estimator_coef_Zargsortrange)rG   r6   rP   rB   transformer1transformer2X_new1X_new2Zscores1Zcandidate_indices1ru   Zscores2Zcandidate_indices2r   r   r"   test_max_features   sL    
	
  

 r   c                  C   s   t jdddddddd\} }| jd }tdddddddd	d	dg
}td|d D ]Z}tt||tj d
}|	| |}t
| d }t|t| |jd |ksRtqRd S )Nrq   r+   rr   r   Frs   rE      rY   )rP   r3   )r   r}   rK   rU   ro   r   r   rk   rV   r[   where_get_support_maskr   aranger@   )rG   r6   rP   Zfeature_importancesru   rC   X_newZselected_feature_indicesr   r   r"   test_max_features_tiebreak   s*    
	
r   c            
   	   C   s   t jdddddddd\} }tddd}t|dtj d	}|| |}t|d
d}|| |}t|dd
d	}|| |}|jd t|jd |jd kst	|
t| jd tjd d f }	t|| d d |	d f  d S )Nrq   r+   rr   r   Frs   rh   rN   rO   g{Gz?ry   rE   )r   r}   r   r   rU   rV   r[   rK   rc   r@   r:   r   Znewaxisr   )
rG   r6   rB   r   r   r   r   Ztransformer3ZX_new3Zselected_indicesr   r   r"   test_threshold_and_max_features   s&    
	$$r   c            	   	   C   s   t jdddddddd\} }tddd}td	d
gtjtjgD ]\}}t||d}|| | t	|j
dsnt|| }|jd | jd k st|j
j}t|||k}t|| d d |f  q>d S )Nrq   r+   rr   r   Frs   rh   rN   meanmedianry   rp   rE   )r   r}   r   ziprU   r   r   r   r4   r\   r   r@   r:   rK   rp   r~   r   )	rG   r6   rB   r3   funcrC   r   rm   feature_maskr   r   r"   test_feature_importances  s&    


r   c               	   C   s   t jdddddddd\} }t|j}||dk  d9  < tddd}t|d	}|j| |d d
 | }|j| ||d
 | }t	||krt
|j| |d| d
 | }t	||kst
d S )Nd   r+   rr   r   Frs   rE   )r0   Zfit_interceptr>   )sample_weight)r   r}   rU   ZonesrK   r   r   r4   r   allr@   )rG   r6   r   rB   rC   maskZweighted_maskZreweighted_maskr   r   r"   test_sample_weight-  s*    

r   r?   r*   rz   r{   r0   )Zl1_ratior0   c              	   C   sh   t jdddddddd\}}t| d}||| ||}t|jjdk}t	||d d |f  d S )	Nr   r+   rr   r   Frs   r>   gh㈵>)
r   r}   r   r4   r:   rU   r~   r   r   r   )r?   rG   r6   rC   r   r   r   r   r"   test_coef_default_thresholdI  s    



r   c            
   
   C   s   t jddddddddd\} }t }tdd	gtjtjgD ]\}}d
dtjfD ]}tt ||d}|	| | t
|jds~t|| }|jd
 | jd
 k st|	| | tjj|jd|d}|||k}	t|| d d |	f  qNq:d S )Nrq   r+   rr   r   Fr   )rt   ru   rv   rw   rx   r/   r0   Z	n_classesr   r   rE   rY   )r?   r3   Z
norm_orderr   )Zaxisord)r   r}   r   r   rU   r   r   rV   r   r4   r\   r   r@   r:   rK   ZlinalgZnormr   r   )
rG   r6   rB   r3   r   orderrC   r   rm   r   r   r   r"   test_2d_coefe  s4    
  
r   c                  C   s   t dddd d} t| d}|jttttd |j}|jttttd |j}||ks^t|	t}|
tttftttf t||	t tt d}t|drtd S )Nr   FrM   )r0   r/   r.   r1   r>   classespartial_fit)r   r   r   r5   r6   rU   uniquer   r@   r:   r4   ZvstackZconcatenater   r   r\   )rB   rC   Z	old_modelZ	new_modelX_transformr   r   r"   test_partial_fit  s"       

 r   c                  C   sN   t ddd} t| d}|tt |jdd |tt |jjdksJtd S )Nautor   )Zdualr0   r>   r   )Zestimator__C)	r   r   r4   r5   r6   
set_paramsr   Cr@   rA   r   r   r"   test_calling_fit_reinitializes  s    
r   c               	   C   s  t ddddd d} t| }|tt |t}| tt t| dd}t|t| |tt |j| k	srtt| dd}|tt t|t| t ddddd d} t| dd}d}t	j
t|d	 |tt W 5 Q R X t	j
t|d	 |tt W 5 Q R X t	j
t|d	 |t W 5 Q R X t dddd d
tt} t| dd}|tt t|jj| j |tt t|jj| j d S )Nr*   r+   Tr   r,   )prefitFzEWhen `prefit=True`, `estimator` is expected to be a fitted estimator.rQ   )r-   r.   r/   r1   )r   r   r4   r5   r6   r:   r   r   r@   r7   r8   r   r   r   r   )r;   r<   r   rW   r   r   r"   test_prefit  s6    
r   c               	   C   s   t ddd} | tt t| ddd d}d}tjt|d	 |t W 5 Q R X d
}|j	|d tjt
dd	 |t W 5 Q R X dS )z:Check the interaction between `prefit` and `max_features`.rM   r   rN   Tc                 S   s
   | j d S rJ   r`   rF   r   r   r"   rH     rI   z*test_prefit_max_features.<locals>.<lambda>r   rP   z[When `prefit=True` and `max_features` is a callable, call `fit` before calling `transform`.rQ   g      @)rP   z!`max_features` must be an integerN)r   r4   r5   r6   r   r7   r8   r   r:   r   r9   )r?   r<   rW   rP   r   r   r"   test_prefit_max_features  s    r   c               	   C   s   t ddd} | tt t| ddd}t|j}d| d}tjt	|d	 |
  W 5 Q R X |tt |
 }|d
gkstdS )z;Check the interaction between prefit and the feature names.rY   r   rN   TrE   r   zThis z_ instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.rQ   Zx3N)r   r4   r5   r6   r   typer%   r7   r8   r   get_feature_names_outr@   )r;   r<   namerW   Zfeature_namesr   r   r"   !test_prefit_get_feature_names_out  s    

r   c                  C   sn   t ddd} t| dd}|tt |t}| tt dt| j }| j|k}t	|td d |f  d S )Nrh   r   rN   z0.5*meanr2         ?)
r   r   r4   r5   r6   r:   rU   r   rp   r   )rB   r<   r   r3   r   r   r   r"   test_threshold_string  s    

r   c                  C   s\   t ddddd d} t| dd}|tt |t}d|_|jd	 |tjd	 ksXtd S )
Nr*   r+   Tr   r,   z
0.1 * meanr2   z
1.0 * meanrE   )	r   r   r4   r5   r6   r:   r3   rK   r@   )r;   r<   r   r   r   r"    test_threshold_without_refitting  s    
r   c                  C   s@   t dd} t| d}t }tj|d< tj|d< |tt d S )Nr   r   r>   rE   )	r
   r   r5   copyrU   nanrV   r4   r6   )r;   r<   nan_datar   r   r"   test_fit_accepts_nan_inf  s    



r   c                  C   sL   t ddd} t }t| d}||t tj|d< tj|d< |	| d S )Nr   r   rN   r>   rE   )
r)   r5   r   r   r4   r6   rU   r   rV   r:   )r;   r   r<   r   r   r"   test_transform_accepts_nan_inf  s    


r   c                  C   sL   t  } t| d}| d dks$tt }t|d}| d dksHtd S )Nr>   r   TF)r   r   Z	_get_tagsr@   r(   )Zallow_nan_estr<   Z
no_nan_estr   r   r"   'test_allow_nan_tag_comes_from_estimator-  s    

r   c                 C   s   t | jS rl   )rU   r~   Zexplained_variance_)Zpca_estimatorr   r   r"   _pca_importances7  s    r   zestimator, importance_getterz$named_steps.logisticregression.coef_c                 C   s6   t | d|d}|tt |tjd dks2td S )Nr   )r3   importance_getterrE   )r   r4   r5   r6   r:   rK   r@   )r?   r   selectorr   r   r"   test_importance_getter;  s      r   PLSEstimatorc                 C   sJ   t dddd\}}| dd}tt||||}|||dksFtdS )	zCheck the behaviour of SelectFromModel with PLS estimators.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12410
    rh   r+   r   rt   ru   r0   rE   )Zn_componentsr   N)r   r   r   r4   Zscorer@   )r   rG   r6   r?   r<   r   r   r"   test_select_from_model_plsM  s    
r   c               	      s   t d tjddd\ } t j} fdd}tt |d | }t	|j
 j t| }||k sntt & tdt | jdd	  W 5 Q R X d
S )zvSelectFromModel works with estimators that do not support feature_names_in_.

    Non-regression test for #21949.
    pandasTas_frameZ
return_X_yc                    s   t  jd S rJ   )rU   r   rK   r>   rF   r   r"   r   c  s    zHtest_estimator_does_not_support_feature_names.<locals>.importance_getter)r   errorrE   rr   N)r7   importorskipr   	load_irissetcolumnsr   r   r4   r   feature_names_in_r   r@   warningscatch_warningssimplefilterUserWarningr:   Ziloc)r6   all_feature_namesr   r   Zfeature_names_outr   rF   r"   -test_estimator_does_not_support_feature_namesZ  s"    

  
r   zerror, err_msg, max_featuresz max_features == 10, must be <= 4r+   zmax_features == 5, must be <= 4c                 C   s   | j d d S rJ   r`   )xr   r   r"   rH   z  rI   c              	   C   sP   t jdddd\}}tj| |d$ tt |dj||ddgd W 5 Q R X d	S )
zDTest that partial_fit from SelectFromModel validates `max_features`.r   r   r   r   rQ   r?   rP   rE   r   N)r   r}   r7   r8   r   r   r   )r   rW   rP   rG   r6   r   r   r"   &test_partial_fit_validate_max_featuresv  s    	
   r   r   TFc                 C   sb   t d tj| dd\}}tt ddj||dddgd	}| rPt|j|j	 nt
|d
r^tdS )zITest that partial_fit from SelectFromModel validates `feature_names_in_`.r   Tr   r   r   r   rE   rY   r   r   N)r7   r   r   r   r   r   r   r   r   r   r\   r@   )r   rG   r6   r   r   r   r"   'test_partial_fit_validate_feature_names  s    
  r   )[rS   r   Zunittest.mockr   ZnumpyrU   r7   Zsklearnr   Zsklearn.baser   Zsklearn.cross_decompositionr   r   r   Zsklearn.datasetsr   Zsklearn.decompositionr	   Zsklearn.ensembler
   r   Zsklearn.exceptionsr   Zsklearn.feature_selectionr   Zsklearn.linear_modelr   r   r   r   r   r   r   Zsklearn.pipeliner   Zsklearn.svmr   Zsklearn.utils._testingr   r   r   r   r   r   r(   r)   r   Zirisr5   targetr6   randomZRandomStaterngr=   rD   markZparametrizerK   r9   	TypeErrorrX   r_   rd   rg   rj   rk   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   <module>   s   $		




	+


	

	+




