U
    |h                     @   s   d dl Zd dlZd dlmZ d dlmZ d dlmZ	 dd Z
dd Zejjd	d
dgdd dD gddgddd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! ZdS )"    N)assert_array_equal)FeatureHasher)	transformc                  C   st   t dd} d| jkstdddddd	d
g}t dd|}dd |D }t ddd|}t| |  d S )N   )
n_featuresdictbar*   %   )fooZdadaZtzarabazZstring1)r   Zgagac                 s   s   | ]}t | V  qd S Niteritems.0d r   X/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/feature_extraction/tests/test_feature_hasher.py	<genexpr>   s     z,test_feature_hasher_dicts.<locals>.<genexpr>pairr   
input_type)r   r   AssertionErrorr   r   toarray)feature_hasherraw_XZX1genZX2r   r   r   test_feature_hasher_dicts	   s    
r   c                  C   s   dddd dgd dddgg} dD ]}d| }dd	 | D }t|d
dd}||}|jd t| ksnt|jd |kst|d  dkst|d  dkst|jdks&tq&d S )Nr   r   r   asciiquux)   	      r         c                 s   s   | ]
}|V  qd S r   r   r   xr   r   r   r      s     z.test_feature_hasher_strings.<locals>.<genexpr>stringF)r   r   alternate_signr               )encoder   r   shapelenr   sumZnnz)r   Zlg_n_featuresr   itr   Xr   r   r   test_feature_hasher_strings   s"      
r5   r   	my_stringanother_stringc                 c   s   | ]
}|V  qd S r   r   r'   r   r   r   r   2   s     r   )r6   r7   list	generator)idsc              	   C   s8   d}t ddd}tjt|d ||  W 5 Q R X dS )zhFeatureHasher raises error when a sample is a single string.

    Non-regression test for gh-13199.
    z"Samples can not be a single string
   r)   r   )matchN)r   pytestraises
ValueErrorr   )r   msgr   r   r   r   !test_feature_hasher_single_string.   s    rA   c               	   C   s   dddd dgd dddgg} dd | D }t|dtd	\}}}d
d | D }t|dtd	dd\}}}t|| t|| dd | D }t|dtd	dd\}}}tt t|| W 5 Q R X d S )Nr   r   r   r    r!   c                 s   s   | ]}d d |D V  qdS )c                 s   s   | ]}|d fV  qdS r+   Nr   r   fr   r   r   r   I   s     8test_hashing_transform_seed.<locals>.<genexpr>.<genexpr>Nr   r'   r   r   r   r   I   s     z.test_hashing_transform_seed.<locals>.<genexpr>   Fc                 s   s   | ]}d d |D V  qdS )c                 s   s   | ]}|d fV  qdS rB   r   rC   r   r   r   r   L   s     rE   Nr   r'   r   r   r   r   L   s     r   )seedc                 s   s   | ]}d d |D V  qdS )c                 s   s   | ]}|d fV  qdS rB   r   rC   r   r   r   r   Q   s     rE   Nr   r'   r   r   r   r   Q   s     r+   )r/   _hashing_transformstrr   r=   r>   r   )r   Zraw_X_indicesZindptr_Z	indices_0Zindptr_0Z	indices_1r   r   r   test_hashing_transform_seedB   s    

rL   c                  C   s   dd ddddddd	fD } t d
dd}||  \}}tt||dk }tt||dk }ddg|ksztdddg|kstd S )Nc                 s   s   | ]}t | V  qd S r   r   r   r   r   r   r   X   s   z,test_feature_hasher_pairs.<locals>.<genexpr>r+   r&   r   r   r-   r,   r   r!   r   r   r   r   r   )r   r   r   sortednpabsr   r   r   x1Zx2Zx1_nzZx2_nzr   r   r   test_feature_hasher_pairsW   s    rU   c                  C   s  dd ddddddd	fD } t d
dd}||  \}}tt||dk }tt||dk }ddg|ksztdddg|kstdd ddiddifD } ||  \}}t||dk }t||dk }dg|kstdg|kstt|| d S )Nc                 s   s   | ]}t | V  qd S r   r   r   r   r   r   r   e   s   z?test_feature_hasher_pairs_with_string_values.<locals>.<genexpr>r+   arM   abcr,   rN   rO   r   r   r   r   c                 s   s   | ]}t | V  qd S r   r   r   r   r   r   r   p   s     Zbax)r   r   r   rP   rQ   rR   r   r   rS   r   r   r   ,test_feature_hasher_pairs_with_string_valuesd   s     rX   c                  C   sL   d} g dt tdg}t| dd}||}t| tt|| f d S )Nr   r   r   r)   r   )	r   ranger   r   r   r   rQ   zerosr1   )r   r   r   r4   r   r   r   test_hash_empty_inputy   s
    
r[   c                  C   s&   t  ddig} | jjdks"td S )Nr   r   )r   )r   r   datar0   r   )r4   r   r   r   test_hasher_zeros   s    r]   c                  C   sd   t dg} tddd| }|j dk r8|j dks<ttddd| }|j dks`td S )NThequickbrownfoxjumpedTr)   )r*   r   r   F)r8   r   fit_transformr\   minmaxr   r4   ZXtr   r   r   test_hasher_alternate_sign   s
    
 rc   c                  C   sn   t dg} tdddd| }t|jd t| d k s<ttdddd| }|jd t| d ksjtd S )Nr^   Tr+   r)   )r*   r   r   r   F)r8   r   r_   rR   r\   r1   r   rb   r   r   r   test_hash_collisions   s"    
    rd   )ZnumpyrQ   r=   Znumpy.testingr   Zsklearn.feature_extractionr   Z(sklearn.feature_extraction._hashing_fastr   rH   r   r5   markZparametrizerA   rL   rU   rX   r[   r]   rc   rd   r   r   r   r   <module>   s*   


