U
    qh2                     @  s   d dl mZ d dlmZ d dlZd dlmZ G dd deZ	G dd dZ
G d	d
 d
eZddddddZG dd deZdS )    )annotations)IntEnumN)OpRunc                   @  s0   e Zd Zdd Zdd Zdd Zedd Zd	S )
IntMapc                 C  s   t |  g | _d S N)dict__init__
added_keysself r   J/tmp/pip-unpacked-wheel-xnis5xre/onnx/reference/ops/op_tfidf_vectorizer.pyr      s    
zIntMap.__init__c                 C  sd   t |ttfs"tdt| dt |ts@tdt| d|| kr\| j| || |< | | S )Nzkey must be a int or str not .zvalue must be a NGramPart not )
isinstanceintstr	TypeErrortype	NgramPartr	   appendr   keyvaluer   r   r   emplace   s    
zIntMap.emplacec              	   C  s   dd |   D }dg}t|  D ]\}}d|kr|d}t|D ]|\}}|dkr|dkrv|d| d|  q|d| d| d qF|t|d	 kr|d|  qF|d
|  qFq$|d| d| d q$|d d|S )Nc                 S  s   i | ]\}}|t |qS r   )repr).0kvr   r   r   
<dictcomp>   s      z#IntMap.__repr__.<locals>.<dictcomp>{
r   z  =,   z    })itemssortedsplit	enumerater   lenjoin)r   valsZrowsr   r   vsiliner   r   r   __repr__   s     

zIntMap.__repr__c                 C  s   t | dkrtd| jd S )Nr   zIntMap is empty.)r)   
ValueErrorr	   r
   r   r   r   	first_key1   s    zIntMap.first_keyN)__name__
__module____qualname__r   r   r/   propertyr1   r   r   r   r   r      s
   
r   c                   @  s^   e Zd ZddddZdd Zdd Zd	d
 Zdd Zedd Z	dd Z
dd Zdd ZdS )r   r   )nidc                 C  s   || _ d | _d S r   )id__leafs_)r   r6   r   r   r   r   9   s    zNgramPart.__init__c                 C  s   t  | _d S r   )r   r8   r
   r   r   r   init=   s    zNgramPart.initc                 C  s,   |   rd| j dS d| j d| jdS )Nz
NgramPart()z, )emptyr7   leafs_r
   r   r   r   r/   @   s    zNgramPart.__repr__c                 C  s
   | j d kS r   r8   r
   r   r   r   r;   E   s    zNgramPart.emptyc                 C  s   | j d k	ot| j dkS )Nr   )r8   r)   r
   r   r   r   
has_leavesH   s    zNgramPart.has_leavesc                 C  s   | j d krtd| j S )NzNgramPart was not initialized.)r8   RuntimeErrorr
   r   r   r   r<   K   s    
zNgramPart.leafs_c                 C  s   |   sd S || jkr|S d S r   )r>   r8   r   r   r   r   r   findQ   s
    
zNgramPart.findc                 C  s   | j ||S r   )r<   r   r   r   r   r   r   X   s    zNgramPart.emplacec                 C  s
   | j | S r   r=   r@   r   r   r   __getitem__[   s    zNgramPart.__getitem__N)r2   r3   r4   r   r9   r/   r;   r>   r5   r<   rA   r   rB   r   r   r   r   r   8   s   
r   c                   @  s   e Zd ZdZdZdZdZdS )WeightingCriteriar   r#         N)r2   r3   r4   NONETFIDFTFIDFr   r   r   r   rC   _   s   rC   r   )n_ngrams
ngram_sizengram_idc           
      C  s   t |ddD ]t}d}|}|t| k r|| | td}	||krX||	_|d7 }|d7 }q|	 rh|	  |	j}|d7 }|d7 }qq|S )Nr   r#   )ranger)   r   r   r7   r;   r9   r<   )
ZelsZ	els_indexrJ   rK   rL   cZ_ngramsnmpr   r   r   populate_gramsf   s     rS   c                	   @  s\   e Zd Zdd ZdddddddZddd	d
ddZdd	dddddddZdddZdS )TfIdfVectorizerc                 C  sv  t | || | j}|dkr&tj| _n"|dkr8tj| _n|dkrHtj| _| j| _	| j
| _| j| _| j| _| j
| _| j| _t| jd | _| j| _| j| _| j| _td| _| j  t| jp| j}d}d}tt| jD ]}| j| }|d t| jk r
| j|d  n|}	|	| }
|
dkrh|
| }|| j	kr`|| jkr`t| jpN| j||||| j}n||7 }|d7 }qd S )NrG   rH   rI   r#   ir   ) r   r   moderC   rG   weighting_criteria_rH   rI   min_gram_lengthZmin_gram_length_max_gram_lengthZmax_gram_length_max_skip_countZmax_skip_count_ngram_countsZngram_counts_ngram_indexesngram_indexes_maxoutput_size_weightsweights_pool_int64sZpool_int64s_pool_stringsZpool_strings_r   
int64_map_r9   r)   rN   rS   )r   Z	onnx_nodeZ
run_paramsrU   total_itemsrL   rK   r-   Z	start_idxZend_idxr%   Zngramsr   r   r   r      s\    





	zTfIdfVectorizer.__init__r   z	list[int]None)rL   row_numfrequenciesreturnc                 C  s0   |d8 }|| j  | j|  }||  d7  < d S )Nr#   )r^   r\   )r   rL   rf   rg   Z
output_idxr   r   r   increment_count   s    zTfIdfVectorizer.increment_countz
np.ndarray)Brg   rh   c                 C  s  g }|dkr| | j d}n| | | | j t|}| j}t|}tj|ftjd}| j}| jt	j
krt|D ]\}	}
|
||	< qxn| jt	jkrt|dkrd}t|D ]6}t|D ](}	|| dkr||	 nd||< |d7 }qqn,d}|D ]"}
|
dkrdnd||< |d7 }qn| jt	jkrt|dkrzd}t|D ]2}t|D ]"}	||	 ||  ||< |d7 }qPqDn d}|D ]}
|
||< |d7 }qntd||S )Nr   r#   ZdtypezUnexpected weighting_criteria.)r   r^   tuplenpprodr;   Zfloat32r`   rV   rC   rG   r(   rH   r)   rN   rI   r?   Zreshape)r   rj   rg   Zl_output_dimsZoutput_dimsrow_sizeZ
total_dimsYwr-   frR   Z_batchr   r   r   output_result   sL    

zTfIdfVectorizer.output_resultN)Xrf   ro   rg   rh   c                 C  s"  t |jdkr|| }n|}d}|| }|d }|}td|d D ]}|}|}||k r|||d   }||krpq|}| j}d}| r||kr||k r|| }||}|d krq|| j}||kr|dkr| ||| || }|d7 }||7 }q~|d7 }qN|dkrB|d7 }||krB qqBd S )Nr#   r   )r)   shaperN   rc   r>   rA   r7   ri   )r   rt   rf   ro   rg   rX   rY   rW   rU   rZ   r[   ra   rb   r_   ZX_flatZ	row_beginZrow_endZmax_skip_distanceZstart_ngram_sizeZskip_distanceZngram_startZngram_row_endZat_least_thisZ
ngram_itemZint_maprK   valhitr   r   r   compute_impl   sL    




zTfIdfVectorizer.compute_implc                   sh  t j}d}d}d j}t|dkrLd}d |dkrtd| dnpt|dkrfd}|d  nVt|dkr|d }|d  |}|dk rtd| d  dntd| d  d|  |krtd|   d	| dt j|j ft jd
|dksj	 r
|fS  	
fdd}t|D ]}|| qJ
|fS )Nr   r#   zUnexpected total of items r   rD   zCInput shape must have either [C] or [B,C] dimensions with B > 0, B=z, C=z*Unexpected total of items, num_rows * C = z != total_items = rk   c                   s(   j |  	
d d S )N)	rX   rY   rW   rU   rZ   r[   ra   rb   r_   )rx   )rf   Crt   rg   rX   rY   rW   rU   rZ   r[   ra   rb   r   r_   r   r   fnj  s    z TfIdfVectorizer._run.<locals>.fn)rm   rn   ru   r)   r0   zerosr^   Zint64rc   r;   rs   rN   )r   rt   rX   rY   rW   rU   rZ   r[   ra   rb   r_   rd   Znum_rowsrj   Z
input_dimsr{   r-   r   ry   r   _run/  sF    
$zTfIdfVectorizer._run)	NNNNNNNNN)	NNNNNNNNN)r2   r3   r4   r   ri   rs   rx   r}   r   r   r   r   rT      s.   7	5         B         rT   )
__future__r   enumr   Znumpyrm   Zonnx.reference.op_runr   r   r   r   rC   rS   rT   r   r   r   r   <module>   s   +'