U
    }hJK                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	Z
d dlZd dlmZ d dlZd dlmZmZmZ d dlmZmZmZmZ d dlmZmZ dZd	Zd
ZdZdZ eZ!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd  Z+d!d" Z,d#d$ Z-d%d& Z.ej/0d'd(d) Z1d*d+ Z2d,d- Z3d.d/ Z4d0d1 Z5d2d3 Z6d4d5 Z7d6d7 Z8d8d9 Z9d:d; Z:d<d= Z;d>d? Z<ej/=d@d dAdBdCdDgej/=dEdFdGgej/=dHdIdJdKgdLdM Z>dNdO Z?dPdQ Z@dRdS ZAdS )T    N)BZ2File)BytesIO)NamedTemporaryFile)dump_svmlight_fileload_svmlight_fileload_svmlight_files)assert_allcloseassert_array_almost_equalassert_array_equalfails_if_pypy)_open_binary_pathzsklearn.datasets.tests.datazsvmlight_classification.txtzsvmlight_multilabel.txtzsvmlight_invalid.txtzsvmlight_invalid_order.txtc              
   K   s.   t t| }t|f|W  5 Q R  S Q R X dS )zG
    Helper to load resource `filename` with `importlib.resources`
    N)r   TEST_DATA_MODULEr   )filenamekwargsf r   O/tmp/pip-unpacked-wheel-ig1s1lm8/sklearn/datasets/tests/test_svmlight_format.py_load_svmlight_local_test_file   s    r   c                  C   s  t t\} }| jjd dks t| jd dks2t| jd dksDt|jd dksVtdD ]\}}}| ||f |ksZtqZ| d dkst| d dkst| d	 dkst| d
 dkst| d dkst| d  d9  < | d dkstt|ddddddg d S )Nr               )r      g      @r   
   g)r      g      ?r            ?r      )r         )r      )r   r   )r      )r      )r      )r   r   r   r   r&      )r   datafileindptrshapeAssertionErrorr
   Xyijvalr   r   r   test_load_svmlight_file'   s    r5   c               
   C   sr   t tt^} t| } t| \}}t| tj}z(t|\}}t	|j
|j
 t	|| W 5 t| X W 5 Q R X d S N)r   r   r+   strr   osopenO_RDONLYcloser	   data)	data_pathX1y1fdX2y2r   r   r   test_load_svmlight_file_fdJ   s    rC   c               	   C   sN   t tt"} tt| \}}t| \}}W 5 Q R X t|j|j t|| d S r6   )r   r   r+   r   r7   r   r<   )r=   r>   r?   rA   rB   r   r   r   test_load_svmlight_pathlib\   s
    rD   c                  C   s(   t tdd\} }|ddddgks$td S )NTZ
multilabel)r   r   )r   r   )r   r   )r   	multifiler.   )r0   r1   r   r   r   "test_load_svmlight_file_multilabelf   s    rG   c               	   C   s   t tt&} tt| gd tjd\}}}}W 5 Q R X t| |  t	|| |j
tjksbt|j
tjksrtt tt*} tt| gd tjd\}}}}}	}
W 5 Q R X |j
|j
kst|j
|	j
kst|	j
tjkstd S )Nr   )dtyper&   )r   r   r+   r   r7   npfloat32r
   toarrayr	   rH   r.   float64)r=   ZX_trainZy_trainZX_testZy_testr>   r?   rA   rB   ZX3Zy3r   r   r   test_load_svmlight_filesk   s"     
 rM   c               	   C   s   t tdd\} }| jjd dks$t| jd dks6t| jd dksHtdD ]\}}}| ||f |ksLtqLtt t tdd W 5 Q R X d S )	N   )
n_featuresr   r   r   r   )r   r   r   r!   r$   )r   r+   r,   r-   r.   pytestraises
ValueErrorr/   r   r   r   "test_load_svmlight_file_n_features~   s    rS   c            	      C   s2  t t\} }tdddd}|  ttt,}t|jd}t	
|| W 5 Q R X W 5 Q R X t|j\}}t|j W 5 Q R X t|  |  t|| tdddb}|  ttt*}t|jd}t	
|| W 5 Q R X W 5 Q R X t|j\}}t|j W 5 Q R X t|  |  t|| d S )Nzsklearn-testz.gz)prefixsuffixwbz.bz2)r   r+   r   r;   r   r   gzipr9   nameshutilcopyfileobjr   r8   remover	   rK   r   )	r0   r1   tmpr   Zfh_outZXgzZygzZXbzZybzr   r   r   test_load_compressed   s&     
 r]   c                	   C   s"   t t tt W 5 Q R X d S r6   )rP   rQ   rR   r   invalidfiler   r   r   r   test_load_invalid_file   s    r_   c                	   C   s"   t t tt W 5 Q R X d S r6   )rP   rQ   rR   r   invalidfile2r   r   r   r   test_load_invalid_order_file   s    ra   c               	   C   s.   t d} tt t| dd W 5 Q R X d S )Ns   -1 4:1.
1 0:1
F
zero_based)r   rP   rQ   rR   r   )r   r   r   r   test_load_zero_based   s    rd   c            
      C   sv   d} d}t | }t|dd\}}|jdks.tt | }t |}t||gdd\}}}}	|jdksdt|jdksrtd S )Ns   -1 1:1 2:2 3:3
s   -1 0:0 1:1
autorb   )r   r&   )r   r*   )r   r   r-   r.   r   )
Zdata1Zdata2f1r0   r1   f2r>   r?   rA   rB   r   r   r   test_load_zero_based_auto   s    rh   c                  C   s   d} t t| dd\}}t|dddg t| ddgd	d
gddgg tt| gdd}t t| dd}||fD ]J\}}}t|dddg t|dddg t| ddgd	d
gddgg qrd S )NsM   
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12Fquery_idr&   r   r   g(\?gQ?gp=
ף?皙?gףp=
?Tr   )r   r   r
   rK   r   )r<   r0   r1   Zres1Zres2qidr   r   r   test_load_with_qid   s     rm   zPtesting the overflow of 32 bit sparse indexing requires a large amount of memoryc                  C   sf   d dd tddD } tt| dd\}}}t|dd	 d
dd
dg tt|tdd d	S )zU
    load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID
       
c                 s   s   | ]}d  | V  qdS )z.3 qid:{0} 1:0.53 2:0.12
2 qid:{0} 1:0.13 2:0.1N)formatencode).0r2   r   r   r   	<genexpr>   s   z&test_load_large_qid.<locals>.<genexpr>r   i ZbTri   Nr&   r   )joinranger   r   r
   rI   uniquearange)r<   r0   r1   rl   r   r   r   test_load_large_qid   s    rx   c                  C   s`   t tL ttt6} ttt }tt| t|t| g W 5 Q R X W 5 Q R X W 5 Q R X d S r6   )	rP   rQ   rR   r   r   r+   r^   r   r7   )r=   Zinvalid_pathr   r   r   test_load_invalid_file2   s     ry   c                	   C   s"   t t td W 5 Q R X d S )NgzG?)rP   rQ   	TypeErrorr   r   r   r   r   test_not_a_filename   s    r{   c                	   C   s"   t t td W 5 Q R X d S )Nztrou pic nic douille)rP   rQ   OSErrorr   r   r   r   r   test_invalid_filename   s    r}   c                  C   s  t t\} }|  }t|}| t| jd  }|t|jd  }| ||fD ]}|||fD ]n}dD ]b}tjtj	tj
tjfD ]F}	t }
t|r|jd dkr|j}||	}t|||
d|d |
d |
 }t|d}dtj |kst|
 }t|d}dd	g| d
 |ks$tt|
|	|d\}}|j|	ksFtt| j|j | }t|rv| }n|}|	tjkrt||d t|j|	dd|d qt||d t|j|	dd|d qqjq`qPd S )Nr   )TFr   testcommentrc   utf-8zscikit-learn %soneZzeroz-based)rH   rc   r*   F)copyr   )r   r+   rK   sp
csr_matrixrI   rw   r-   rJ   rL   Zint32Zint64r   issparseTastyper   seekreadliner7   sklearn__version__r.   r   rH   r
   Zsorted_indicesindicesr	   )ZX_sparsey_denseZX_densey_sparseZX_slicedZy_slicedr0   r1   rc   rH   r   ZX_inputr   rA   rB   ZX2_denseZX_input_denser   r   r   	test_dump  s^    


    



    r   c                  C   s   dddddgdddddgdddddgg} dddgdddgdddgg}t |}||fD ]T}t }t| ||dd |d | dkst| dkst| d	ksVtqVd S )
Nr   r   r&   r   TrE   s   1 0:1 2:3 4:5
s   0,2 
s   0,1 1:5 3:1
)r   r   r   r   r   r   r.   )r0   r   r   r1   r   r   r   r   test_dump_multilabelF  s    (

r   c            
   	   C   s   d} d}d}d}d}| ||||gdddd	d	gd	d	d	d	d	gd	d	d	d	d	gd	d	d	d	d	gg}| ||||g}t  }t||| |d	 | d
kst| dkst| dkst| dkst| dkst|d	 t|\}}	t||  t||	 d S )Nr   g @gGz@g     ?r    g    eAg NgmCgkcEr   s+   1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1
s!   2.1 0:1000000000 1:2e+18 2:3e+27
s   3.01 
s   1.000000000000001 
s   1 
)r   r   r   r   r.   r   r	   rK   )
r   ZtwoZthreeexactZalmostr0   r1   r   rA   rB   r   r   r   test_dump_conciseT  s0    

r   c               	   C   s  t t\} }|  } t }d}t| |||dd |d t|dd\}}t| |  t|| d}t }t	t
 t| |||d W 5 Q R X |d}t }t| |||dd |d t|dd\}}t| |  t|| t }t	t t| ||d	d W 5 Q R X d S )
Nz*This is a comment
spanning multiple lines.Fr   r   rb   s   It is true that
½² = ¼)r   r   zI've got a  .)r   r+   rK   r   r   r   r   r	   rP   rQ   UnicodeDecodeErrordecoderR   )r0   r1   r   Zascii_commentrA   rB   Zutf8_commentZunicode_commentr   r   r   test_dump_comments  s.    




r   c               	   C   sn   t t\} }t }|g}tt t| || W 5 Q R X t }tt t| |d d | W 5 Q R X d S )N)r   r+   r   rP   rQ   rR   r   )r0   r1   r   Zy2dr   r   r   test_dump_invalid  s    r   c                  C   s   t t\} }|  } t| jd d }t }t| |||dd |d t	|ddd\}}}t
| |  t
|| t
|| d S )Nr   r   Trj   rc   )r   r+   rK   rI   rw   r-   r   r   r   r   r	   )r0   r1   rj   r   r>   r?   Z	query_id1r   r   r   test_dump_query_id  s    

r   c                  C   s  d} t t| dd\}}}dddgddd	gddd	gddd	gg}dd
d
dg}d
dddg}t|| t| | t|| t }t||||dd |d
 t |ddd\}}}t|| t| | t|| |d
 t |ddd\}}t|| t| | d S )Ns   
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985Tri   r   r   r&   ixUl   \.,N^iYr   l l    r   F)r   r   r
   rK   r   r   )r<   r0   r1   rl   true_Xtrue_yZtrueQIDr   r   r   r   test_load_with_long_qid  s.    






r   c                  C   sz   t  } ttjdd}tdddg}t|||  dD ]<}| d t| d|d\}}t	|| t	|
 |
  q8d S )N)r&   r*   r-   r   r   )re   TFr*   )rO   rc   )r   r   r   rI   zerosarrayr   r   r   r	   rK   )r   r   r   rc   r0   r1   r   r   r   test_load_zeros  s    

r   sparsityrk   g      ?gGz?r   	n_samples   e   rO   r   r   )   c                 C   s  t jd}|jdd||fd}| r0d||| k < t|}|jdd|d}t }t||| |	d t
| }d}|d }	|	| }
d| d }||	 }t||||
d	\}}t|||	|d	\}}t|||d
\}}t |||g}t|||g}t|| t| |  d S )Nr           r    lowhighsizer   r&   r*   r   )rO   offsetlength)rO   r   )rI   randomRandomStateuniformr   r   randintr   r   r   lengetvaluer   concatenatevstackr	   rK   )r   r   rO   rngr0   r1   r   r   Zmark_0Zmark_1Zlength_0Zmark_2Zlength_1X_0y_0X_1y_1ZX_2Zy_2y_concatX_concatr   r   r   test_load_with_offsets  s@    

   
   

r   c                  C   sr  t jd} t ddddddgddddddgddddddgddddddgddddddgddddddgddddddgg}t|}|j\}}| jdd|d}t |d }t	 }t
||||d |d t| }t|D ]}|d t||d	d|d
\}	}
}t||d	|dd
\}}}t ||g}t |
|g}t|	|g}t|| t|| t| |  qd S )Nr   r   r   r&   r*   r   r   ri   T)rO   rj   r   r   r   )rI   r   r   r   r   r   r-   r   rw   r   r   r   r   r   ru   r   r   r   r	   r
   rK   )r   r0   r   rO   r1   rj   r   r   markr   r   Zq_0r   r   Zq_1Zq_concatr   r   r   r   r   "test_load_offset_exhaustive_splits  sR    



        

r   c                	   C   s,   t jtdd ttddd W 5 Q R X d S )Nzn_features is required)matchr&   )r   r   )rP   rQ   rR   r   r+   r   r   r   r   test_load_with_offsets_error2  s    r   c                 C   s   t | d }tjd}|ddtj}tddddg}tddddddg}tddddddg}tj	|||fd	d
}t
|||dd t|dd\}}	dddg}
|	|
kstdS )z
    Ensure that if y contains explicit zeros (i.e. elements of y.data equal to
    0) then those explicit zeros are not encoded.
    Zsvm_explicit_zero*   r&   r   r   r   r   r   )r&   r&   r   TrE   )g       @)r   r    N)r7   rI   r   r   Zrandnr   rL   r   r   r   r   r   r.   )Ztmp_pathZ	save_pathr   r0   r,   r   r<   r1   _Zy_loadZy_truer   r   r    test_multilabel_y_explicit_zeros7  s    
r   )BrW   r8   rY   bz2r   ior   tempfiler   ZnumpyrI   rP   Zscipy.sparsesparser   r   Zsklearn.datasetsr   r   r   Zsklearn.utils._testingr   r	   r
   r   Zsklearn.utils.fixesr   r   r   r+   rF   r^   r`   Z
pytestmarkr   r5   rC   rD   rG   rM   rS   r]   r_   ra   rd   rh   rm   r   skiprx   ry   r{   r}   r   r   r   r   r   r   r   r   Zparametrizer   r   r   r   r   r   r   r   <module>   sj   #

E!$$*