U
    }hA                  
   @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZ	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ d dlmZmZmZ d	d
dddddgZdZeeedddZd[ddZ dd Z!dd Z"d\ddZ#dd Z$d]d d!Z%ee	j&fe ee	j&fe$ee	j'fe#ee	j'fe%iZ(d"d# Z)d$d% Z*d&d' Z+d(d) Z,d*d+ Z-d,d- Z.e
j/0d.d/d0d1ge
j/0d2eege
j/0d3e	j&e	j'gd^d5d6Z1e
j/0d.d/d0d1ge
j/0d2eege
j/0d3e	j&e	j'gd_d7d8Z2e
j/0d9ee	j&fee	j'fee	j'fee	j&fgd:d; Z3e
j/0d.d/d1d0ge
j/0d<ddd=d>dd?ge
j/0d2eege
j/0d3e	j&e	j'gd`dAdBZ4e
j/0dCdDdEge
j/0dFd dGge
j/0d<ee
j/0dHdIe
j/0d3e	j&e	j'gdadJdKZ5e
j/0dCdDdEge
j/0dFd dGge
j/0d<ee
j/0dHdIe
j/0d3e	j&e	j'gdbdLdMZ6e
j/0d2eege
j/0d<d=dge
j/0d3e	j&e	j'gdNdO Z7e
j/0dPd4dQge
j/0dCdRd@d4ge
j/0dSddTdUge
j/0d3e	j&e	j'gdVdW Z8dXdY Z9dS )c    N)defaultdict)floorlog10)
csr_matrix)cdist)euclidean_distances)ArgKminArgKminClassMode BaseDistancesReductionDispatcherRadiusNeighborssqeuclidean_row_norms)assert_allcloseassert_array_equalcreate_memmap_backed_dataZ
braycurtisZcanberra	chebyshev	cityblock	euclidean	minkowski
seuclidean   )metric
n_featuresseedc              	   C   sn   t j|}| dkrNtddtddtddtt jdtd||dg}|S | dkrht||dgS i gS )	z5Return list of dummy DistanceMetric kwargs for tests.r   g      ?)p      )r   wr   )V)nprandomRandomStatedictinfrand)r   r   r   rngZminkowski_kwargs r%   [/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/metrics/tests/test_pairwise_distances_reduction.py_get_metric_params_list+   s    
r'   Hz>c                 C   s"   t ||dd t| |d|d d S )Nz/Query vectors have different neighbors' indiceserr_msgz1Query vectors have different neighbors' distancesr*   rtol)r   r   )ref_distdistref_indicesindicesr,   r%   r%   r&   assert_argkmin_results_equalityD   s    r1   c                 C   s2   | dkrdS t ttt| d }t| || S )zIRound a scalar to a number of significant digits relatively to its value.r           r   )intr   r   absround)Zscalarn_significant_digitsZ	magnituder%   r%   r&   relative_roundingR   s    r7   c                   C   s   t dddkstt dddks$tt dddks6tt dddksHtt dddksZtt dd	d
ksltt dddks~tt dddkstt dddkstt dd	dkstt dddkstt dd	dkstt dddkstt dddkstd S )Nr   r   r2   
   i@ i[r   i 'r   iT   gރB?333333?gGz?g<^@g     ^@	   )r7   AssertionErrorr%   r%   r%   r&   test_relative_roundingZ   s    r=   -C6?c                 C   s\  dd }t ttt|d  }| j|j  krH|j  krH|jksRn td| j\}}t|D ]}	| |	 }
||	 }||
std|	 ||std|	 t|
||d ||	 }||	 }tt	}tt	}t|D ]8}t
|
| |d}|| ||  || ||  qd	|	 d
| d|d}| D ] }|| || ks4t|q4qddS )a  Assert that argkmin results are valid up to:
      - relative tolerance on computed distance values
      - permutations of indices for distances values that differ up to
        a precision level

    To be used for testing neighbors queries on float32 datasets: we
    accept neighbors rank swaps only if they are caused by small
    rounding errors on the distance computations.
    c                 S   s   t | d d | dd  kS Nr   r   allar%   r%   r&   <lambda>~       z7assert_argkmin_results_quasi_equality.<locals>.<lambda>r   z&Arrays of results have various shapes.)Reference distances aren't sorted on row Distances aren't sorted on row r,   r6   Neighbors indices for query - are not matching when rounding distances at & significant digits derived from rtol=.1eN)r3   r   r   r4   shaper<   ranger   r   setr7   addkeys)r-   r.   r/   r0   r,   	is_sortedr6   	n_queriesZn_neighbors	query_idxref_dist_rowdist_rowref_indices_rowindices_rowreference_neighbors_groupseffective_neighbors_groupsneighbor_rankrounded_distmsgrounded_distancer%   r%   r&   %assert_argkmin_results_quasi_equalityn   sT     

ra   c                 C   sj   t | jd D ]V}| | |k s&tt|| || d| dd t| | || d| ddd qd S )Nr   zQuery vector #z! has different neighbors' indicesr)   z# has different neighbors' distancesr(   r+   )rP   rO   rB   r<   r   r   )r-   r.   r/   r0   radiusir%   r%   r&   (assert_radius_neighbors_results_equality   s    

rd   c              	   C   s  dd }t ttt|d  }t| t|  krPt|  krPt|ksZn tdt| }t|D ]}	| |	 }
||	 }||
std|	 ||std|	 t|
t|kr|
n|}tt|
t|}||d }|jdkr4t	
|| |  ko|| kn  s4td	| d
| d| d|
d| }
|d| }t|
||d ||	 }||	 }tt}tt}t|D ]:}t|
| |d}|| ||  || ||  qd|	 d| d|d}| D ] }|| || kst|qqjdS )a  Assert that radius neighborhood results are valid up to:
      - relative tolerance on computed distance values
      - permutations of indices for distances values that differ up to
        a precision level
      - missing or extra last elements if their distance is
        close to the radius

    To be used for testing neighbors queries on float32 datasets: we
    accept neighbors rank swaps only if they are caused by small
    rounding errors on the distance computations.

    Input arrays must be sorted w.r.t distances.
    c                 S   s   t | d d | dd  kS r?   rA   rC   r%   r%   r&   rE      rF   z@assert_radius_neighbors_results_quasi_equality.<locals>.<lambda>r   z'Arrays of results have various lengths.rG   rH   Nr   zThe last extra elements (u   ) aren't in [radius ± rtol]=[u    ± ]rI   rJ   rK   rL   rM   rN   )r3   r   r   r4   lenr<   rP   minsizer   rB   r   r   rQ   r7   rR   rS   )r-   r.   r/   r0   rb   r,   rT   r6   rU   rV   rW   rX   Zlargest_rowZ
min_lengthZlast_extra_elementsrY   rZ   r[   r\   r]   r^   r_   r`   r%   r%   r&   .assert_radius_neighbors_results_quasi_equality   sf     
,ri   c            	      C   s  d} d}d| }d| }d| }d| }t dd|d|g||d||gg}t dddd	d
gdddddgg}t|||||  tt dd|d|ggt dddddggt dddd	d
ggt ddd	d
dgg| d tt ||d||ggt ||d||ggt dddddggt dddddgg| d d}tjt|d` tt dd|d|ggt dd|d|ggt dddd	d
ggt dddd	d
gg| d W 5 Q R X d}tjt|d` tt dd|d|ggt dd|d|ggt dddd	d
ggt ddd	d
dgg| d W 5 Q R X d}tjt|d` tt dd|d|ggt dd|d|ggt dddd	d
ggt ddd	d
dgg| d W 5 Q R X d S )Nr(         ?ffffff@r:         @r   r   r                  r;   r8   rI   .Neighbors indices for query 0 are not matchingmatch Distances aren't sorted on row 0)r   arrayra   pytestraisesr<   	r,   ZepsZ_1mZ_1pZ_6_1mZ_6_1pr-   r/   r_   r%   r%   r&   *test_assert_argkmin_results_quasi_equality7  s        			rz   c            	      C   s  d} d}d| }d| }d| }d| }t dd|d|gt |d||gg}t dddd	d
gt ddddgg}t||||d| d tt t dd|d|ggt t dd|d|ggt t dddd	d
ggt t ddd	d
dggd| d tt t ||d||ggt t ||d||ggt t dddddggt t dddddggd| d d}tjt|dz tt t dd|d|ggt t dd|d|ggt t dddd	d
ggt t dddd	d
ggd| d W 5 Q R X tt t dd|d|ggt t dd|dggt t dddd	d
ggt t dddd	ggd| d td}tjt|df tt t dddggt t ddggt t dddggt t ddggd| d W 5 Q R X d}tjt|dz tt t dd|d|ggt t dd|d|ggt t dddd	d
ggt t ddd	d
dggd| d W 5 Q R X d}tjt|dz tt t dd|d|ggt t dd|d|ggt t dddd	d
ggt t ddd	d
dggd| d W 5 Q R X d S )Nr(   rj   rk   r:   rl   r   r   r   rm   rn   ro   rp   rq   r;   )rb   r,   r8   rr   rs   uH   The last extra elements ([6.]) aren't in [radius ± rtol]=[6.1 ± 1e-07]ru   )r   rv   ri   rw   rx   r<   reescapery   r%   r%   r&   3test_assert_radius_neighbors_results_quasi_equality  s    





r}   c                  C   s  t jd} | dd}| dd}t|}t|}d}t|||sJtt|||s\tt|||sntt|||stt|t j	|t j	|stt|t j
|t j
|stt|t j|t j|rttj||ddrtt|t j
||rtt||t j|r2ttt |||rLttj||ddsbttj||ddsxttj||ddsttj||ddstt|d }t|||rtt|}|jt j|_t|||rtd S )	Nr   d   r8   	manhattanZpyfunc)r   r   Zsqeuclidean)r   r   r    r#   r   r
   Zis_usable_forr<   astypefloat64float32Zint64int32asfortranarrayr0   )r$   XYX_csrY_csrr   ZX_csr_0_nnzZX_csr_int64r%   r%   r&   /test_pairwise_distances_reduction_is_usable_for  s|    
 
 
 
 
 
 
  
 
 
  
      r   c            	   	   C   s  t jd} | dd}| dd}d}d}d}tjt|d  tj|	t j
|||d W 5 Q R X d	}tjt|d  tj||	t j||d W 5 Q R X tjtd
d tj||d|d W 5 Q R X tjtdd tj||d|d W 5 Q R X tjtdd tj|||dd W 5 Q R X tjtdd" tjt ddg|||d W 5 Q R X tjtdd tjt ||||d W 5 Q R X ddi}d}tjt|d tj|||||d W 5 Q R X dt|ddd}d}tjt|d tj|||||d W 5 Q R X dt|ddi}t ( tjdtd tj|||||d W 5 Q R X t|ddt|ddd}t ( tjdtd tj|||||d W 5 Q R X d S ) Nr   r~   r8   rn   r   kOnly float64 or float32 datasets pairs are supported at this time, got: X.dtype=float32 and Y.dtype=float64rs   )r   r   kr   iOnly float64 or float32 datasets pairs are supported at this time, got: X.dtype=float64 and Y.dtype=int32k == -1, must be >= 1.r@   k == 0, must be >= 1.r   Unrecognized metricwrong metric;Buffer has wrong number of dimensions \(expected 2, got 1\)rj          @ndarray is not C-contiguousr   r   4Some metric_kwargs have been passed \({'p': 3}\) but)r   r   r   r   metric_kwargsr   num_threadsr   Y_norm_squared?Some metric_kwargs have been passed \({'p': 3, 'Y_norm_squared'X_norm_squarederrorcategoryr   r   )r   r   r    r#   rw   rx   
ValueErrorr   computer   r   r   rv   r   warnsUserWarningr   warningscatch_warningssimplefilter)	r$   r   r   r   r   r_   unused_metric_kwargsmessager   r%   r%   r&   (test_argkmin_factory_method_wrong_usages7  sl    $$ &"    
 




r   c               
   C   s  t jd} | dd}| dd}d}d}d}| jdddd}t |}d	}tjt|d
& t	j
|t j||||||d W 5 Q R X d}tjt|d
& t	j
||t j|||||d W 5 Q R X tjtdd
 t	j
||d||||d W 5 Q R X tjtdd
 t	j
||d||||d W 5 Q R X tjtdd
 t	j
|||d|||d W 5 Q R X tjtdd
( t	j
t ddg||||||d W 5 Q R X tjtdd
$ t	j
t |||||||d W 5 Q R X d}	d|	 d}
tjt|
d
 t	j
|||||	||d W 5 Q R X d S )Nr   r~   r8   rn   r   uniformr   lowhighrh   r   rs   )r   r   r   r   weightslabelsunique_labelsr   r   r@   r   r   r   r   rj   r   r   non_existent_weights_strategyz[Only the 'uniform' or 'distance' weights options are supported at this time. Got: weights='z'.)r   r   r    r#   randintuniquerw   rx   r   r	   r   r   r   r   rv   r   )r$   r   r   r   r   r   r   r   r_   r   r   r%   r%   r&   2test_argkmin_classmode_factory_method_wrong_usages  s    






 


r   c            	   	   C   st  t jd} | dd}| dd}d}d}d}tjt|d  tj|	t j
|||d W 5 Q R X d	}tjt|d  tj||	t j||d W 5 Q R X tjtd
d tj||d|d W 5 Q R X tjtdd tj|||dd W 5 Q R X tjtdd" tjt ddg|||d W 5 Q R X tjtdd tjt ||||d W 5 Q R X ddi}d}tjt|d tj|||||d W 5 Q R X dt|ddd}d}tjt|d tj|||||d W 5 Q R X t|ddt|ddd}t ( tjdtd tj|||||d W 5 Q R X dt|ddi}t ( tjdtd tj|||||d W 5 Q R X d S )Nr   r~   r8   rn   r   r   rs   )r   r   rb   r   r   zradius == -1.0, must be >= 0.r@   r   r   r   rj   r   r   r   r   r   )r   r   rb   r   r   r   r   r   r   r   r   r   r   )r   r   r    r#   rw   rx   r   r   r   r   r   r   rv   r   r   r   r   r   r   r   )	r$   r   r   rb   r   r_   r   r   r   r%   r%   r&   1test_radius_neighbors_factory_method_wrong_usages  s    
   $           
    


     

    r   zn_samples_X, n_samples_Yr~   r~   )  r~   )r~   r   
Dispatcherdtyper~   c                 C   s   t j| }d}||||| }||||| }	|tkrRd}
i }i }n"dt | }|}
d|i}ddi}|j||	|
fdddd|\}}|j||	|
fd	ddd|\}}t||f ||||f| d
S )z3Check that results do not depend on the chunk size.r~   r8   rb   sort_resultsT   r   )
chunk_sizer   return_distance)   N)	r   r   r    r#   r   r   logr   ASSERT_RESULTglobal_random_seedr   n_samples_Xn_samples_Yr   r   r$   spreadr   r   	parametercheck_parameterscompute_parametersrb   r-   r/   r.   r0   r%   r%   r&   test_chunk_size_agnosticismV  sT    




   r   c              	   C   s   t j| }d}||||| }||||| }	|tkrRd}
i }i }n"dt | }|}
d|i}ddi}|j||	|
fddd|\}}tj	dd	d
& |j||	|
fddd|\}}W 5 Q R X t
||f ||||f| dS )z:Check that results do not depend on the number of threads.r~   r8   rb   r   T   r   r   r   Zopenmp)ZlimitsZuser_apiN)r   r   r    r#   r   r   r   r   threadpoolctlZthreadpool_limitsr   r   r%   r%   r&   test_n_threads_agnosticism  sR    
		
   r   zDispatcher, dtypec                 C   s  t j| }d}d\}}||||| }||||| }t|}	t|}
|tkrjd}i }i }n"dt | }|}d|i}ddi}|j|||fddd|\}}t	
||	f||
fD ]T\}}||kr||krq|j|||fddd|\}}t||f ||||f| qd	S )
zLCheck that results do not depend on the format (dense, sparse) of the input.r~   r   r8   rb   r   T2   r   N)r   r   r    r#   r   r   r   r   r   	itertoolsproductr   )r   r   r   r$   r   	n_samplesr   r   r   r   r   r   r   r   rb   Z
dist_denseZindices_dense_X_Yr.   r0   r%   r%   r&   test_format_agnosticism  s\    
	

r   r   r   Zinfinity	haversiner8   c                 C   s>  t j| }d}||||| }	||||| }
|dkrxt |	ddddf }	t |
ddddf }
|tkrd}i }i }n"dt | }|}d|i}ddi}|j|	|
|f|t	||| d	d
 |d ddd|\}}|j|	|
|f|t	||| d	d
 |d ddd|\}}t
||f ||||f| dS )z:Check that the results do not depend on the strategy used.r~   r   Nr   r8   rb   r   Tr   r   rm   parallel_on_X)r   r   r   strategyr   parallel_on_Y)r   r   r    r#   r   ascontiguousarrayr   r   r   r'   r   )r   r   r   r   r   r   r   r$   r   r   r   r   r   r   rb   Z
dist_par_XZindices_par_XZ
dist_par_YZindices_par_Yr%   r%   r&   test_strategies_consistency  sz      
  

   r   r   r   r   translation    .Ar   )r   r   c                 C   s  t jddft jddfg}|||f|kr0td t j| }	d}
||	||||
  }||	||||
  }t|}t|}|dkrt 	|d d d df }t 	|d d d df }t
||d }|d	krt||}nt||fd
|i|}t j|ddd d d |f }t j|jt jd}t|jd D ]}|||| f ||< q6t||f||fD ]B\}}tj|||||d|d |d\}}tt|f |||| qfd S )Nr   r   r   z;Numerical differences lead to small differences in results.  r   r   r   r   r   r   Zaxis)r   Trm   )r   r   r   r   r   )r   r   rw   Zxfailr   r    r#   r   r   r   r'   r   r   argsortzerosrO   r   rP   r   r   r   r   r   )r   r   r   r   r   r   r   r   Z
edge_casesr$   r   r   r   r   r   r   dist_matrixZargkmin_indices_refZargkmin_distances_refZrow_idxr   r   Zargkmin_distancesZargkmin_indicesr%   r%   r&   test_pairwise_distances_argkminT  sR    





r   c                 C   s0  t j| }d}|t | }	||||||  }
||||||  }t||| dd }|dkrxt|
|}nt|
|fd|i|}g }g }|D ]T}t 	|j
d ||	k }|| }t |}|| ||  }}|| || qtj|
||	||d|d |dd	\}}tt|f |||||	 d S )	Nr   r   r   r   r   Trm   )r   r   r   r   r   r   )r   r   r    r   r#   r   r'   r   r   ZarangerO   r   appendr   r   r   )r   r   r   r   r   r   r   r$   r   rb   r   r   r   r   Zneigh_indices_refZneigh_distances_refrowindr.   sortZneigh_distancesZneigh_indicesr%   r%   r&   (test_pairwise_distances_radius_neighbors  sT      



    r   c                 C   s   t jd}d}d\}}||||| }||||| }t||g\}	}
|tkrjd}i }i }n"dt | }|}d|i}ddi}|j|||f| dd|\}}|j|	|
|f| dd|\}}t	||f ||||f| d	S )
zACheck that the results do not depend on the datasets writability.r   r~   )   r8   r8   rb   r   T)r   r   N)
r   r   r    r#   r   r   r   r   r   r   )r   r   r   r$   r   r   r   r   r   ZX_mmZY_mmr   r   r   rb   r-   r/   Zdist_mmZ
indices_mmr%   r%   r&   test_memmap_backed_data  sT    	
	
	
   r   r   r   rn   r   r   rq   c              	   C   s   t j| }d}||||| }t|}t jj|ddd }	t||d}
t||d}t	|	|
 t	|	| t
t t |}t||d W 5 Q R X d S )Nr~   r   r   r   r   )r   r   r    r#   r   r   ZlinalgZnormr   r   rw   rx   r   r   )r   r   r   r   r   r$   r   r   r   Zsq_row_norm_referenceZsq_row_normZsq_row_norm_csrr%   r%   r&   test_sqeuclidean_row_norms  s    


r   c            
   
   C   s   t jd} | dd}| dd}d}d}d}| jdddd}t |}tj|||||||d	d
}tj|||||||dd
}	t||	 d S )Nr   r~   r8   rn   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   )	r   r   r    r#   r   r   r	   r   r   )
r$   r   r   r   r   r   r   r   Z	results_XZ	results_Yr%   r%   r&   *test_argkmin_classmode_strategy_consistent/  s:    


r   )r   )r(   )r>   )r>   )r~   )r~   )r8   )r~   r8   )r~   ):r   r{   r   collectionsr   mathr   r   Znumpyr   rw   r   Zscipy.sparser   Zscipy.spatial.distancer   Zsklearn.metricsr   Z-sklearn.metrics._pairwise_distances_reductionr   r	   r
   r   r   Zsklearn.utils._testingr   r   r   Z1CDIST_PAIRWISE_DISTANCES_REDUCTION_COMMON_METRICSstrr3   r'   r1   r7   r=   ra   rd   ri   r   r   r   rz   r}   r   r   r   r   markZparametrizer   r   r   r   r   r   r   r   r   r%   r%   r%   r&   <module>   s   
 
C 
c  SlAMv\  2  1	
8  G  E :1