U
    |hp                     @   s2  d dl mZ d dlZd dlZd dlmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZmZ d d	lmZmZ d d
l m!Z!m"Z" G dd deZ#G dd deZ$ej%&dej%&dej%&dej%'deefej%'dddd Z(ej%'deefej%'dddddddddd dgdddd!gfd"dddddddd gddd!gfdd#ddddddd dgd$d%d&d'gfd"d#ddddddd dgd$d%d&d'gfgd(d) Z)ej%'deefej%'d*d+d,d-dddgfd.d,d-dd.d/gfd+d0dddgfd1d,d-d-d&d'gfd1d2d-d-d&d'gfd1d'd-d-d&d'gfd1d3d-d-d4d3gfd1d5d-d-d6d7gfd1d8d-d-d9d8gfd1dd-d-ddgfd1d.dddgfd1ddddgfgd:d; Z*ej%'deefej%'d<d=d>d?d@dAdBdCdDdEg	dFdG Z+ej%'deefdHdI Z,ej%'dJdKdLdMdNdOgdPdQ Z-ej%'dRdSdd-gid-fdSeddidTfgdUdV Z.ej%'deefej%'dWdXdYidZfdSd9d[d\fd,d]d^d_fd`dadbdcfddeddeidffdde idffgdgdh Z/ej%'dWd1d1didjfgdkdl Z0ej%'dmdndodpdqgdrds Z1ej%'dtdudvdw Z2ej%'dxdd dygfd-d dSdygfdd dzd]dSdygfdTd dzd]dSdygfddd{gfd-dd|d{gfdTdd|d{gfdd-d}gfdTd-d~dd}gfg	dd Z3ej%'deefdd Z4ej%'deefdd Z5ej%'deefdd Z6ej%'deegdd Z7ej%'deegdd Z8dd Z9dS )    )ceilN)exponnormrandint)make_classification)DummyClassifier)enable_halving_search_cv)

GroupKFoldGroupShuffleSplitHalvingGridSearchCVHalvingRandomSearchCVKFoldLeaveOneGroupOutLeavePGroupsOutShuffleSplitStratifiedKFoldStratifiedShuffleSplit)_SubsampleMetaSplitter_top_k)check_cv_results_array_typescheck_cv_results_keys)SVC	LinearSVCc                       s`   e Zd ZU dZejdd eededd D Zee	d< d fd
d	Z
d fdd	Z  ZS )FastClassifierzDummy classifier that accepts parameters a, b, ... z.

    These parameter don't affect the predictions and are useful for fast
    grid searching.c                 C   s   i | ]}t |d qS )Zno_validation)chr).0key r   Y/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/model_selection/tests/test_successive_halving.py
<dictcomp>*   s    zFastClassifier.<dictcomp>az   _parameter_constraints
stratifiedNc                    s   t  j|||d d S N)strategyrandom_stateconstant)super__init__)selfr&   r'   r(   kwargs	__class__r   r   r*   0   s
      zFastClassifier.__init__Fc                    s:   t  j|d}ttdtdd D ]}d|t|< q$|S )N)deepr    r!   r"   Zwhatever)r)   
get_paramsrangeordr   )r+   r/   paramscharr-   r   r   r0   7   s    zFastClassifier.get_params)r$   NN)F)__name__
__module____qualname____doc__r   r#   r1   r2   dict__annotations__r*   r0   __classcell__r   r   r-   r   r   !   s   
	     r   c                       s6   e Zd Zd fdd	Z fdd	Z fd
dZ  ZS )SometimesFailClassifierr$   N
   Fr   c                    s.   || _ || _|| _|| _t j|||d d S r%   )fail_fitfail_predictn_estimatorsr    r)   r*   )r+   r&   r'   r(   r@   r>   r?   r    r-   r   r   r*   ?   s    
  z SometimesFailClassifier.__init__c                    s   | j rtdt ||S )Nzfitting failed)r>   	Exceptionr)   fitr+   Xyr-   r   r   rB   R   s    zSometimesFailClassifier.fitc                    s   | j rtdt |S )Nzpredict failed)r?   rA   r)   predictr+   rD   r-   r   r   rF   W   s    zSometimesFailClassifier.predict)r$   NNr=   FFr   )r5   r6   r7   r*   rB   rF   r;   r   r   r-   r   r<   >   s          r<   z+ignore::sklearn.exceptions.FitFailedWarningz!ignore:Scoring failed:UserWarningz%ignore:One or more of the:UserWarningHalvingSearchfail_at)rB   rF   c           	      C   s   d}t |dd\}}| t d| ddgdtdid	d
ddd}||| |jd|  r`t|jd }|jd }t|	 stt
|t| }|jd dkst|d |k stdS )z\Check the selection of the best scores in presence of failure represented by
    NaN values.  r   	n_samplesr'   Zfail_FTr       r@      r"      )resourcemax_resourcesmin_resourcesfactormean_test_scorerank_test_scoreN)r   r<   r1   rB   best_params_AssertionErrorcv_results_npisnananyuniqueshapeall)	rH   rI   rL   rD   rE   searchZscoresZranksZunique_nan_ranksr   r   r   test_nan_handling]   s$    	

r`   Estzaggressive_elimination,max_resources,expected_n_iterations,expected_n_required_iterations,expected_n_possible_iterations,expected_n_remaining_candidates,expected_n_candidates,expected_n_resources,Tlimited   rM   r"   <            FZ	unlimited%   o   iM  i  c	                 C   s   d}	t |	dd\}
}dttdd}t }|dkr:d}n|	}| ||||d	d
}|jdd | tkrr|jddd ||
| |j|kst|j	|kst|j
|kst|j|kst|j|kst|j|kstt|jd |j |jkstd S )NrJ   r   rK   l1l2   r    brb   rg   rM   )aggressive_eliminationrQ   rS   T)verboserd   exhaustn_candidatesrR   )r   listr1   r   
set_paramsr   rB   n_iterations_rW   n_required_iterations_n_possible_iterations_n_resources_n_candidates_Zn_remaining_candidates_r   rS   )ra   rp   rQ   expected_n_iterationsexpected_n_required_iterationsexpected_n_possible_iterationsZexpected_n_remaining_candidatesexpected_n_candidatesexpected_n_resourcesrL   rD   rE   
param_gridbase_estimatorshr   r   r   test_aggressive_elimination   s2    )r   zfmin_resources,max_resources,expected_n_iterations,expected_n_possible_iterations,expected_n_resources,smallestautorO   2      rm   rr   rJ   iX     iW     iU  i,  d   c                 C   s   d}t |dd\}}ddgdddgd}	t }
| |
|	d||d}| tkrT|jd	d
 ||| d}|j|ksrt|j|kst|j|kst|j	|kst|dkr|j|j  krt
|j	ksn td S )NrJ   r   rK   r"   rO   rM   rn   )rS   rR   rQ   rN   rt   rr   )r   r   r   rw   rB   rx   rW   ry   rz   r{   len)ra   rR   rQ   r}   r   r   rL   rD   rE   r   r   r   r~   r   r   r   test_min_max_resources   s*    &r   z2max_resources, n_iterations, n_possible_iterations)r      	   )   r   r   )i  r      )   r   r   )i  r   rf   )    rc   rc   )   rM   rM   )   rM   rM   )rc   r"   r"   c                 C   s   d}t |dd\}}ddgttdd}t }d}	| ||d|	|dd}
| tkr\|
jd	d
 |
|| |
jdksvt|
j	|kst|
j
|kstd S )Nr   r"   rK   rO   r=   rn   rc   )cvrS   rQ   rR   re   r   r   )r   rv   r1   r   r   rw   rB   ry   rW   rx   rz   )ra   rQ   Zn_iterationsZn_possible_iterationsrL   rD   rE   r   r   rS   r   r   r   r   test_n_iterations  s&    r   c           
   	   C   s@  d}t |dd\}}ddgttdd}t }| ||dddd	d
}||| t|jtdd	dgksltt|j	d |j	d |j	d D ](\}}}	||d   kr|	ksn tqt
jtdd$ t||dddd}||| W 5 Q R X t
jtdd< ddgddgdd	gd}t||dddd}||| W 5 Q R X d S )NrJ   r   rK   r"   rO   r=   rn   crM   )r   rP   rQ   rS   r   n_resourcesr3   param_cz0Cannot use resource=1234 which is not supported matchZ1234)r   rP   rQ   zSCannot use parameter c as the resource since it is part of the searched parameters.)r    ro   r   )r   rv   r1   r   rB   setr{   rW   ziprX   pytestraises
ValueErrorr   )
ra   rL   rD   rE   r   r   r   Zr_ir3   r   r   r   r   test_resource_parameter8  sL              r   z2max_resources, n_candidates, expected_n_candidates)r   rr      )r   rr   r   )r   r   r   )r   rf   rf   )r   r   r   c           	   	   C   sv   d}t |dd\}}ttd}t }t|||d| ddd}||| |jd |ksXt|dkrr|jd	 | ksrtd S )
Nr   r   rK   rn   rO   rc   )rt   r   rQ   rS   rR   rr   ru   )r   r   r   r   rB   r|   rW   r{   )	rQ   rt   r   rL   rD   rE   r   r   r   r   r   r   test_random_search`  s"    
	r   z*param_distributions, expected_n_candidatesr    r=   c                 C   sJ   d}t |dd\}}t }t|| dd}||| |jd |ksFtd S )Nr   r   rK   r=   r   )r   r   r   rB   r|   rW   )param_distributionsr   rL   rD   rE   r   r   r   r   r   )test_random_search_discrete_distributions  s    r   zparams, expected_error_messagerP   Znot_a_parameterz:Cannot use resource=not_a_parameter which is not supported)rP   rQ   z:Cannot use parameter a as the resource since it is part ofro   )rQ   rP   z:resource can only be 'n_samples' when max_resources='auto'      )rR   rQ   z3min_resources_=15 is greater than max_resources_=14r   )shufflezmust yield consistent foldsc              	   C   sT   t  }ddgi}td\}}| ||f|}tjt|d ||| W 5 Q R X d S Nr    r"   r   r   )r   r   r   r   r   rB   )ra   r3   expected_error_messager   r   rD   rE   r   r   r   r   test_input_errors  s    
r   rs   zcannot be both set to 'exhaust'c              	   C   sT   t  }ddgi}td\}}t||f| }tjt|d ||| W 5 Q R X d S r   )r   r   r   r   r   r   rB   )r3   r   r   r   rD   rE   r   r   r   r   test_input_errors_randomized  s    
r   zAfraction, subsample_test, expected_train_size, expected_test_size)      ?T(   r=   )r   Fr   re   )皙?Tr   rc   )r   Fr   re   c           
      C   s   d}t |\}}ttd| |d d}|||D ]r\}}	|jd |ksJt|	jd |ks\t|r|jd |	jd  t||  kstq0|	jd ||j  ks0tq0d S )Nr   r   base_cvfractionsubsample_testr'   r   )	r   r   r   splitr]   rW   intr   Zget_n_splits)
r   r   Zexpected_train_sizeZexpected_test_sizerL   rD   rE   r   Ztraintestr   r   r   test_subsample_splitter_shapes  s    &r   r   )TFc                 C   s   d}t |\}}ttdd| d d}t|j||d d}t|j||d d}t||D ]f\\}}\}	}
t||	krxt| rt||
krtqVt||
kstt|| ||
 ksVtqVd S )Nr   r   r   r   groups)	r   r   r   rv   r   r   rY   r^   rW   )r   rL   rD   rE   r   Zfolds_aZfolds_bZtrain_aZtest_aZtrain_bZtest_br   r   r   #test_subsample_splitter_determinism  s        r   zk, itr, expectedr   defighc                 C   sf   dddddddddg	dddddddd	d
g	dddddddddg	d}t || |d}t||ksbtd S )Nr   r"   rO   rc   rM   r      r=   rN   r   r    ro   r   r   r   r   r   r   r   iterrT   r3   )kitr)r   rY   r^   rW   )r   r   expectedresultsgotr   r   r   
test_top_k  s    r   c                    s>  t d}tjd d}t|dd\}}dttdd}t } fdd	}| ||d
|d}| t	krt|j
ddd ||| t|jd tjstt|jd tjst||j}	t|	d  t|	kst|	d t|	d< |	jdddd}
||
}|j}t|d D ]}|| }|||d  @ |k s<t| ||d  @ }| | @ }| |j|d  ksvt|
| | }|
| | }||k stq|	d  }|	|	d |k d  }|	d  }|j|	j | d kst|	j | d |	j | d k st|	j | d |	j | d ks:td S )Npandasr   rJ   rK   rj   rm   rn   c                    s      S N)Zrand)ZestrD   rE   rngr   r   scorer=  s    ztest_cv_results.<locals>.scorerrO   )rS   Zscoringrd   rr   rs   r   r   rT   r3   Z
params_str)indexcolumnsvaluesr"   )!r   importorskiprY   randomRandomStater   rv   r1   r   r   rw   rB   
isinstancerX   ZndarrayrW   	DataFramer   r\   applystrZpivotZisnarx   r^   sumr|   wheremaxminZidxmaxrV   Ziloc)ra   pdrL   rD   rE   r   r   r   r   cv_results_dftableZnan_maskZn_iteritZalready_discarded_maskZdiscarded_now_maskZ	kept_maskZdiscarded_max_scoreZkept_min_scoreZ	last_iterZidx_best_last_iterZidx_best_all_itersr   r   r   test_cv_results-  s^    
  
r   c                    sx  t d}g  g g G  fdddt}d}d}t|dd\}}dttd	d
}| }| ||d|ddd}	| tkr|	jddd |	|| t	 t	kst
dd t D }
|
d d | }
d d | ||	j}t	t	|
  krt	|ksn t
tj|
dd\}}|	j|k s4t
|	j|k sHt
|d k s^t
|d |
k stt
d S )Nr   c                       s:   e Zd Z fddZ fddZ fddZ  ZS )z=test_base_estimator_inputs.<locals>.FastClassifierBookKeepingc                    s    |jd  t ||S Nr   )appendr]   r)   rB   rC   )r.   passed_n_samples_fitr   r   rB     s    zAtest_base_estimator_inputs.<locals>.FastClassifierBookKeeping.fitc                    s    |jd  t |S r   )r   r]   r)   rF   rG   )r.   passed_n_samples_predictr   r   rF     s    zEtest_base_estimator_inputs.<locals>.FastClassifierBookKeeping.predictc                    s    | t jf |S r   )r   r)   rw   )r+   r3   )r.   passed_paramsr   r   rw     s    
zHtest_base_estimator_inputs.<locals>.FastClassifierBookKeeping.set_params)r5   r6   r7   rB   rF   rw   r;   r   r   r   r   r-   r   FastClassifierBookKeeping  s   r   r   rO   r   rK   rj   rm   rn   F)rS   r   return_train_scoreZrefitrd   rr   rs   c                 S   s   g | ]\}}|| qS r   r   )r   xrE   r   r   r   
<listcomp>  s    z.test_base_estimator_inputs.<locals>.<listcomp>T)Zreturn_countsr3   r   )r   r   r   r   rv   r1   r   rw   rB   r   rW   r   r   rX   rY   r\   r{   r^   r|   )ra   r   r   rL   n_splitsrD   rE   r   r   r   Zpassed_n_samplesr   Zuniquescountsr   r   r   test_base_estimator_inputs  sD    
*r   c              
   C   s   t jd}tdddd\}}|ddd}tddd}dd	gi}t tdtdd
t	ddg}d}|D ]J}	| |||	dd}
t
jt|d |
|| W 5 Q R X |
j|||d qht tddg}|D ]}	| |||	d}
|
|| qd S )Nr   r   rO   )rL   Z	n_classesr'   rM   r   )Zdualr'   Cr"   )r   )r'   z*The 'groups' parameter should not be None.)r   r'   r   r   )r   )rY   r   r   r   r   r   r   r   r	   r
   r   r   r   rB   r   r   )ra   r   rD   rE   r   ZclfZgridZ	group_cvs	error_msgr   ZgsZnon_group_cvsr   r   r   test_groups_support  s(    
r   SearchCVc              	   C   s^   t  }ddgi}tddd}| ||dd}d}tjt|d ||g  W 5 Q R X d	S )
zBCheck that we raise an error if the minimum resources is set to 0.r    r"   r   rM   r   )rR   z;min_resources_=0: you might have passed an empty dataset X.r   N)r   rY   emptyZreshaper   r   r   rB   )r   r   r   rD   r_   err_msgr   r   r   test_min_resources_null  s    
r   c                 C   sr   t dddddddddg	t dddddddd	d
g	t dddddddddg	d}| dd|}|dksntdS )z3Check the selection strategy of the halving search.r   r"   rO   rc   rM   r   r   r=   rN   r   r    ro   r   r   r   r   r   r   r   r   Nr   )rY   arrayZ_select_best_indexrW   )r   r   Z
best_indexr   r   r   test_select_best_index  s    r   c                     s   t dddd\} }dgtddtddd	d
gddgdg}d}d}d}tt d|ddd}|| | t|j}|j t |||| t	||| t
 fddt|D stt
 fddt|D stdS )zqCheck the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
    being a list of dictionary.
    r   rc   *   )rL   Z
n_featuresr'   rbfr=   )Zscaleg?)kernelr   gammapolyrO   rM   )r   Zdegree)param_Cparam_degreeparam_gammaparam_kernel)rT   Zmean_train_scorerU   Zsplit0_test_scoreZsplit1_test_scoreZsplit2_test_scoreZsplit0_train_scoreZsplit1_train_scoreZsplit2_train_scoreZstd_test_scoreZstd_train_scoreZmean_fit_timeZstd_fit_timeZmean_score_timeZstd_score_time)r   r   Tr   )r   r   r   r'   c                 3   sJ   | ]B} d  | dkr d j | o@ d j | o@ d j |  V  qdS )r  r   r   r  r  Nmaskr   r   Z
cv_resultsr   r   	<genexpr>?  s   z;test_halving_random_search_list_of_dicts.<locals>.<genexpr>c                 3   sL   | ]D} d  | dkr d j |  oB d j |  oB d j | V  qdS )r  r   r   r  r  Nr  r  r  r   r   r  H  s   N)r   r   r   r   rB   r   r|   rX   r   r   r^   r1   rW   )rD   rE   r3   Z
param_keysZ
score_keysZ
extra_keysr_   rt   r   r  r   (test_halving_random_search_list_of_dicts  s2        
	r	  ):mathr   ZnumpyrY   r   Zscipy.statsr   r   r   Zsklearn.datasetsr   Zsklearn.dummyr   Zsklearn.experimentalr   Zsklearn.model_selectionr	   r
   r   r   r   r   r   r   r   r   Z2sklearn.model_selection._search_successive_halvingr   r   Z)sklearn.model_selection.tests.test_searchr   r   Zsklearn.svmr   r   r   r<   markfilterwarningsZparametrizer`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	  r   r   r   r   <module>   s   0


!"""/$
'


	
	







d
B
 

