U
    }hV                     @   sz  d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z
 d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZm Z m!Z!m"Z"m#Z# d dl$m%Z% dd Z&d	d
 Z'ej()dg e*dfdddge*dfe+g e*dfe+dddge*dfej,,de*dfgdd Z-ej()di dddgdd Z.dd Z/dd Z0dd Z1dd Z2dd  Z3d!d" Z4d#d$ Z5d%d& Z6d'd( Z7ej()d)d*dd ge+d*dd ge8d*dd ggd+d, Z9d-d. Z:d/d0 Z;d1d2 Z<d3d4 Z=d5d6 Z>d7d8 Z?d9d: Z@e#eAd;d<d= ZBd>d? ZCd@dA ZDdBdC ZEej()dDdEdFgdGdH ZFdIdJ ZGdKdL ZHdMdN ZIdOdP ZJdQdR ZKdSdT ZLdUdV ZMdS )W    N)defaultdict)partial)make_biclusters
make_blobsmake_checkerboardmake_circlesmake_classificationmake_friedman1make_friedman2make_friedman3make_hastie_10_2make_low_rank_matrix
make_moonsmake_multilabel_classificationmake_regressionmake_s_curvemake_sparse_coded_signalmake_sparse_uncorrelatedmake_spd_matrixmake_swiss_roll)assert_allcloseassert_almost_equalassert_array_almost_equalassert_array_equalignore_warnings)assert_all_finitec                  C   sR  ddg} t ddddddddd d | d	d
\}}| ddgks<t|jdksNtd|jdks`tdt|jdksxtdt|d	kdkstdt|dkdkstdt|dkdkstdt dddd	d	ddd	d\}}|jdkstd|jdkstdt|d|jfg|jd  |jd |jd jd	 dksNtd!d S )"N皙?      ?d               Fr   )	n_samples
n_featuresn_informativen_redundant
n_repeated	n_classesn_clusters_per_class	hypercubeshiftscaleweightsrandom_stater   r   X shape mismatchr   y shape mismatchr"   Unexpected number of classes
   z(Unexpected number of samples in class #0   z(Unexpected number of samples in class #1   A   z(Unexpected number of samples in class #2     T      ?)r#   r$   r%   r&   r'   r*   r,   r.   )r9   r:   )r9    z Unexpected number of unique rows)	r   AssertionErrorshapenpuniquesumviewdtypeZreshape)r-   Xy rG   Q/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/datasets/tests/test_samples_generator.pytest_make_classification'   s^    

" rI   c                  C   s  d} t t| ddddddd}ddgdfddgd dfdd	gd
 dfddgd dfddd	gdfddgd dftddgdffD ]\}}}t|}|| }|d }dD ]}||||||||dd\}	}
|	j||fkst|
j|fkstt|	}|jd	|j
d d}tj|dd\}}t||ks2tdtt}t||
D ]\}}|| | qD| D ]}t||ksftdqft||kstdtt|
t|
 | dg| dd tt|D ]x}|	||k jdd}|rtt||  t|ddd n6tt& tt||  t|ddd W 5 Q R X qqq|tt |ddddd  W 5 Q R X tt |ddddd  W 5 Q R X d!S )"zTest the construction of informative features in make_classification

    Also tests `n_clusters_per_class`, `n_classes`, `hypercube` and
    fully-specified `weights`.
    g    .Ar   r!   F)	class_sepr&   r'   Zflip_yr+   r,   shuffler7   gUUUUUU?r"   r      r;         ?r5   @   2   )FT)r#   r(   r-   r$   r%   r)   r*   r.   z|S{0})rD   T)Zreturn_inversez6Wrong number of clusters, or not in distinct quadrantsz"Wrong number of clusters per classzWrong number of classesz!Wrong number of samples per classerr_msgZaxisr    z/Clusters are not centered on hypercube vertices)decimalrQ   z5Clusters should not be centered on hypercube vertices)r$   r%   r(   r)   N)r   r   intlenr?   r>   r@   signrC   formatstridesrA   r   setzipaddvaluesr   bincountrangeZmeanabsonespytestraises
ValueError)rJ   maker%   r-   r)   r(   
n_clustersr#   r*   rE   rF   ZsignsZunique_signsZcluster_indexZclusters_by_classZclusterclsZclustersZcentroidrG   rG   rH   -test_make_classification_informative_featuresW   s    
	




	rg   zweights, err_type, err_msgz:Weights specified but incompatible with number of classes.r   rM   r   r"   c              	   C   s(   t j||d t| d W 5 Q R X d S )Nmatch)r-   )ra   rb   r   )r-   Zerr_typerQ   rG   rG   rH   %test_make_classification_weights_type   s    rj   kwargs)r(   r%   c                 C   sV   t f ddgdd| \}}t f tddgdd| \}}t|| t|| d S )Nr   g?r   )r-   r.   )r   r@   arrayr   )rk   X1y1X2y2rG   rG   rH   1test_make_classification_weights_array_or_list_ok   s    "
rq   c               	   C   s   t ddD ]\} }tddddd| d\}}|jd	ks<td
| sZtdd |D dksZttdd |D |kstttdd |D dks
tq
d S )NTFr   r!   r   r   r"   r   Fr#   r$   r(   r.   Zreturn_indicatorallow_unlabeledr/   r0   c                 S   s   g | ]}t |qS rG   )max.0rF   rG   rG   rH   
<listcomp>   s     zHtest_make_multilabel_classification_return_sequences.<locals>.<listcomp>r7   c                 S   s   g | ]}t |qS rG   rU   rw   rG   rG   rH   ry      s     c                 S   s   g | ]}t |qS rG   rz   rw   rG   rG   rH   ry      s     )rZ   r   r?   r>   rv   minru   
min_lengthrE   YrG   rG   rH   4test_make_multilabel_classification_return_sequences   s    
r   c                  C   s   t ddD ]^\} }tdddd| d\}}|jdks:td	|jd
ksLtdttj|dd|ks
tq
tdddd| dd\}}}}t|| t|| |jdkstt	| d |jdkstt	|jdddgd  d S )Nrr   rs   r6   r   r"   r   )r#   r$   r(   r.   ru   r6   r   r0   r6   r"   Y shape mismatchrR   T)r#   r$   r(   r.   ru   Zreturn_distributionsr3   r!   )r   r"   )
rZ   r   r?   r>   r@   allrB   r   r   r   )ru   r}   rE   r~   ro   ZY2Zp_cZp_w_crG   rG   rH   4test_make_multilabel_classification_return_indicator   s2    
	

r   c               	   C   sb   t ddD ]R\} }tddddd| d\}}|jd	ks<td
|jdksNtdt|s
tq
d S )Nrr   rs   r6   r   r"   r   sparsert   r   r0   r   r   )rZ   r   r?   r>   spissparser|   rG   rG   rH   ;test_make_multilabel_classification_return_indicator_sparse  s    
r   c                  C   sP   t ddd\} }| jdks"td|jdks4tdt|jdksLtd	d S )
Nr   r   )r#   r.   r   r5   r0   r1   r2   )r7   r4   )r   r?   r>   r@   rA   rE   rF   rG   rG   rH   test_make_hastie_10_2   s    r   c               
   C   s   t ddddddddd	\} }}| jd
ks0td|jdksBtd|jdksTtdt|dkdksltdtt|t| | ddd t ddd\} }| jdkstd S )Nr   r5   r"   r    T              ?r   )r#   r$   r%   effective_rankcoefZbiasnoiser.   r   r0   r1   r2   r5   coef shape mismatch)Unexpected number of informative featuresr!   rS   )r#   r$   )r   r!   )r   r?   r>   rB   r   r@   stddotrE   rF   crG   rG   rH   test_make_regression'  s"     r   c               	   C   s   t dddddddd\} }}| jdks.td	|jd
ks@td|jdksRtdtt|dkdd tt|t| | ddd d S )Nr   r5   r"   Tr   r   )r#   r$   r%   	n_targetsr   r   r.   r   r0   )r   r"   r2   )r5   r"   r   r   r   r!   r   )	r   r?   r>   r   rB   r   r@   r   r   r   rG   rG   rH    test_make_regression_multitarget@  s    
r   c                  C   s   t dddg} t ddgddgddgg}tddd|| d	\}}|jd
ksTtd|jdksftdt |jdks~tdtt|| D ]*\}\}}t|||k | 	 |dd qd S )N皙?皙?皙?r   r   r   rO   r7   )r.   r#   r$   centerscluster_std)rO   r7   r0   )rO   r2   r3   zUnexpected number of blobsr!   Unexpected std)
r@   rl   r   r?   r>   rA   	enumeraterZ   r   r   )cluster_stdsZcluster_centersrE   rF   ictrr   rG   rG   rH   test_make_blobsT  s    
r   c                  C   s\   dddg} t | ddd\}}|jt| dfks6tdttj|t| d| ksXtd	d S )
NrO      r   r7   r   r#   r$   r.   r0   Z	minlength$Incorrect number of samples per blobr   r?   rB   r>   r   r@   r]   rU   )r#   rE   rF   rG   rG   rH   test_make_blobs_n_samples_listf  s    
r   c                  C   s   dddg} t ddgddgddgg}t dddg}t| ||dd\}}|jt| d	fksdtd
tt j|t| d| kstdt	t
||D ]*\}\}}t|||k |  |dd qd S )Nr   r   r   r   r   r   r   )r#   r   r   r.   r7   r0   r   r   r!   r   )r@   rl   r   r?   rB   r>   r   r]   rU   r   rZ   r   r   )r#   r   r   rE   rF   r   r   r   rG   rG   rH   +test_make_blobs_n_samples_list_with_centersp  s"    
   
r   r#   r    c                 C   sV   d }t | |dd\}}|jt| dfks0tdttj|t| d| ksRtdd S )Nr   )r#   r   r.   r7   r0   r   r   r   )r#   r   rE   rF   rG   rG   rH   &test_make_blobs_n_samples_centers_none  s    r   c                  C   s<   ddg} d}t | |ddd\}}}|jt| |fks8td S )Nr5   r   r"   Tr   )r#   r$   Zreturn_centersr.   )r   r?   rU   r>   )r#   r$   rE   rF   r   rG   rG   rH   test_make_blobs_return_centers  s       r   c               	   C   s  dddg} t ddgddgddgg}t dddg}td|  d|d d	  }tjt|d
 t| |d d	 d W 5 Q R X td| d|d d	  }tjt|d
 t| ||d d	 d W 5 Q R X dd}tjt|d
 t| dd W 5 Q R X d S )Nr   r   r   r   r   r   zMLength of `n_samples` not consistent with number of centers. Got n_samples = z and centers = r=   rh   )r   zNLength of `clusters_std` not consistent with number of centers. Got centers = z and cluster_std = )r   r   z8Parameter `centers` must be array-like. Got {!r} insteadr"   )	r@   rl   reescapera   rb   rc   r   rW   )r#   r   r   Zwrong_centers_msgZwrong_std_msgZwrong_type_msgrG   rG   rH   test_make_blobs_error  s$    
 r   c               	   C   s   t ddddd\} }| jdks&td|jdks8td	t|dttj| d d df  | d d d
f   d| d d df d d   d| d d df   d| d d df    d S )Nr    r5   r   r   )r#   r$   r   r.   r    r5   r0   r    r2   r!   r   r7   r;   r"   rL   )r	   r?   r>   r   r@   sinpir   rG   rG   rH   test_make_friedman1  s    .r   c               	   C   s   t dddd\} }| jdks$td|jdks6tdt|| d d df d	 | d d d
f | d d d	f  d
| d d d
f | d d df    d	  d  d S )Nr    r   r   r#   r   r.   r    rL   r0   r   r2   r7   r!   r"   r;   )r
   r?   r>   r   r   rG   rG   rH   test_make_friedman2  s     ^r   c               
   C   s   t dddd\} }| jdks$td|jdks6tdt|t| d d d	f | d d d
f  d	| d d d	f | d d df    | d d df   d S )Nr    r   r   r   r   r0   r   r2   r!   r7   r"   )r   r?   r>   r   r@   Zarctanr   rG   rG   rH   test_make_friedman3  s     Xr   c                  C   sZ   t dddddd} | jdks$tddd	lm} || \}}}t|d d
k sVtdd S )NrO   r6   r    g{Gz?r   )r#   r$   r   Ztail_strengthr.   )rO   r6   r0   )svdr   zX rank is not approximately 5)r   r?   r>   numpy.linalgr   rB   )rE   r   usvrG   rG   rH   test_make_low_rank_matrix  s    r   c                  C   s   t dddddd\} }}| jdks*td|jd	ks<td
|jdksNtd|D ]}tt|dksRtdqRt| ||  tt|d jddt	|jd  d S )Nr       r5   r"   r   )r#   n_componentsr$   n_nonzero_coefsr.   r   r   )r   r5   D shape mismatch)r    r   r0   Non-zero coefs mismatchr7   r!   rR   )
r   r?   r>   rU   r@   flatnonzeror   sqrtrB   r`   )r~   DrE   rowrG   rG   rH   test_make_sparse_coded_signal  s    r   )categoryc                  C   s   t ddddddd\} }}| jdks,td	|jd
ks>td|jdksPtd|jD ]}tt|dksVtdqVt| ||  tt|d j	ddt
|jd  d S )Nr    r   r5   r"   r   Tr#   r   r$   r   r.   Zdata_transposed)r5   r    r   )r5   r   r   )r   r    r0   r   r7   rR   r!   )r   r?   r>   TrU   r@   r   r   r   rB   r`   )r~   r   rE   colrG   rG   rH   (test_make_sparse_coded_signal_transposed  s    
r   c               	   C   s6   d} t jt| d tddddddd W 5 Q R X dS )z)Check the message for future deprecation.z-data_transposed was deprecated in version 1.3rh   r!   r   Tr   N)ra   ZwarnsFutureWarningr   )Zwarn_msgrG   rG   rH   0test_make_sparse_code_signal_deprecation_warning
  s    r   c                  C   s:   t dddd\} }| jdks$td|jdks6tdd S )	Nr    r5   r   r   r   r0   r   r2   )r   r?   r>   r   rG   rG   rH   test_make_sparse_uncorrelated  s    r   c                  C   sb   t ddd} | jdkstdt| | j ddlm} || \}}t|dkt	dgd d d S )	Nr    r   )Zn_dimr.   )r    r    r0   )eigTzX is not positive-definite)
r   r?   r>   r   r   r   r   r   r@   rl   )rE   r   Zeigenvalues_rG   rG   rH   test_make_spd_matrix  s      r   holeFTc                 C   st   t ddd| d\}}|jdks"t|jdks0tt|d d df |t|  t|d d df |t|  d S )Nr    r   r   )r#   r   r.   r   r    r"   r   r7   )r   r?   r>   r   r@   cosr   )r   rE   trG   rG   rH   test_make_swiss_roll-  s
     r   c                  C   s   t dddd\} }| jdks$td|jdks6tdt| d d df t| t| d d d	f t|t|d
   d S )Nr    r   r   r   r   r0   r   zt shape mismatchr7   r!   )r   r?   r>   r   r@   r   rV   r   )rE   r   rG   rG   rH   test_make_s_curve7  s
    r   c                  C   s   t ddddd\} }}| jdks(td|jdks:td|jdksLtd	t|  t| t| t ddddd\}}}t| | d S )
Nr   r   rL   Tr   r?   re   rK   r.   r0   )rL   r   rows shape mismatchcolumns shape mismatch)r   r?   r>   r   r   )rE   rowscolsro   r   rG   rG   rH   test_make_biclusters@  s*          r   c                  C   s   t ddddd\} }}| jdks(td|jdks:td|jdksLtdt dd	ddd\} }}t|  t| t| t dd	ddd\}}}t dd	ddd\}}}t|| d S )
Nr   )r   r    Tr   r   r0   r   r   r7   )r   r?   r>   r   r   )rE   r   r   rm   r   ro   rG   rG   rH   test_make_checkerboardT  sB                r   c                  C   s^   t ddd\} }t| |D ]>\}}|dkr2ddgnddg}|| d  }t|dd	d
 qd S )Nr"   F)rK   r   r   r   r;   r7   z$Point is not on expected unit circlerP   )r   rZ   rB   r   )rE   rF   xlabelcenterdist_sqrrG   rG   rH   test_make_moonso  s      r   c               	   C   s   t dd\} }t|dkdkr2t|dkdks:td| jdksLtd	|jd
ks^tdtjtdd t dd W 5 Q R X d S )N)   r    r#   r   r   r!   r    z$Number of samples in a moon is wrong)   r7   r0   )r   r2   z8`n_samples` can be either an int or a two-element tuple.rh   r   )r   r@   rB   r>   r?   ra   rb   rc   r   rG   rG   rH   test_make_moons_unbalancedy  s    r   c                  C   s   d} dD ]\}}}t |dd | d\}}|j|dfks<td|j|fksPtdddg}t||D ]N\}}|| d  }	|d	krd
n| d }
|d	krd
n| d }
t|	|
dd qb||d	k j|dfkstd||dk j|dfkstdqd S )Ng333333?))r   r"   rL   )r   rL   rL   F)rK   r   factorr7   r0   r2   r   r   r   zPoint is not on expected circlerP   z1Samples not correctly distributed across circles.r!   )r   r?   r>   rZ   rB   r   )r   r#   Zn_outerZn_innerrE   rF   r   r   r   r   Zdist_exprG   rG   rH   test_make_circles  s6      r   c               	   C   s   t dd\} }t|dkdks(tdt|dkdksBtd| jd	ksTtd
|jdksftdtjtdd t dd W 5 Q R X d S )N)r7   r   r   r   r7   z*Number of samples in inner circle is wrongr!   r   z*Number of samples in outer circle is wrong)r5   r7   r0   r   r2   z7When a tuple, n_samples must have exactly two elements.rh   )r   r@   rB   r>   r?   ra   rb   rc   r   rG   rG   rH   test_make_circles_unbalanced  s    r   )Nr   collectionsr   	functoolsr   Znumpyr@   ra   Zscipy.sparser   r   Zsklearn.datasetsr   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.utils._testingr   r   r   r   r   Zsklearn.utils.validationr   rI   rg   markZparametrizerc   rl   randomrj   rq   r   r   r   r   r   r   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rG   rG   rG   rH   <module>   s   P0c



 $




		
