
    h                     J    S SK rS SKrS SKrS SKrS SKJrJr   " S S\5      r	g)    N)OpRunRuntimeTypeErrorc                   h    \ rS rSrSr    S	S jr\     S
S j5       r\S 5       r\S 5       r	Sr
g)StringNormalizer   z
The operator is not really threadsafe as python cannot
play with two locales at the same time. stop words
should not be implemented here as the tokenization
usually happens after this steps.
Nc                    UnUc  [        5       n[        5       nOb[        U5      nUS:X  a   U V	s1 s H  oR                  5       iM     nn	O1US:X  a   U V	s1 s H  oR                  5       iM     nn	O[        U5      n[        R                  " UR
                  UR                  S9n
[        UR
                  5      S:X  aD  [        SUR
                  S   5       H&  nU R                  US S 2U4   U
S S 2U4   UUUUUS9  M(     O:[        UR
                  5      S:X  a  U R                  UU
UUUUUS9  O[        S5      e[        U
R
                  5      S:X  a  U
R
                  S   S:X  ax  [        R                  " U
R                  5       S    V	s/ s H  n	[        U	5      S:  d  M  U	PM     sn	/5      n
U
R
                  S   S:X  a  [        R                  " S	//5      n
U
4$ [        U
R
                  5      S:X  al  [        R                  " U
R                  5        V	s/ s H  n	[        U	5      S:  d  M  U	PM     sn	5      n
[        U
5      S:X  a  [        R                  " S	/5      n
U
4$ s  sn	f s  sn	f s  sn	f s  sn	f )
NLOWERUPPER)dtype   r      )slocalestops	raw_stopsis_case_sensitivecase_change_actionzx must be a matrix or a vector. )setloweruppernpemptyshaper   lenrange_run_columnr   arraytolist)selfxr   r   locale	stopwordsr   r   r   wresis               a/var/www/fran/franai/venv/lib/python3.13/site-packages/onnx/reference/ops/op_string_normalizer.py_runStringNormalizer._run   s'    IEEII!W,,56IqI6#w.,56IqI6Ihhqwwagg.qww<11aggaj)  adG1I#'&7'9 !  * \Q#"3#5   ##DEEsyy>Q399Q<1#4((

QF13q6A:QFGHCyy|q hhv&
 v	 ^q ((szz|B|!s1vzA|BCC3x1}hhtnvK 768 G Cs#   I)I.I3%I3I83I8c           
      F   [         R                  " 5       U:w  a&   [         R                  " [         R                  U5        U S S  US S & [        SU R                  S   5       H9  n[        X   [        5      (       a  SX'   M   [        R                  X   5      X'   M;     U(       aI  [        U5      S:  a:  [        SU R                  S   5       H  n[        R                  X   U5      X'   M     US	:X  a5  [        SU R                  S   5       H  nX   R                  5       X'   M     OQUS
:X  a5  [        SU R                  S   5       H  nX   R!                  5       X'   M     OUS:w  a  [#        SU< S35      eU(       dI  [        U5      S:  a:  [        SU R                  S   5       H  n[        R                  X   U5      X'   M     U$ ! [         R                   a@  n[
        R                  " SU< S[         R                  " 5       < SU< S3SS9   S nAGNS nAff = f)NzUnknown local setting z (current: z) - .r   )
stacklevelr   r   r	   r
   NONEz'Unknown option for case_change_action: )pylocale	getlocale	setlocaleLC_ALLErrorwarningswarnr   r   
isinstancefloatr   strip_accents_unicoder   _remove_stopwordsr   r   RuntimeError)	cincoutr   r   r   r   r   er%   s	            r&   r   StringNormalizer._run_columnM   s    7*""8??G< a&Qq#))A,'A$'5))*@@I ( Ua1ciil+*<<TWiP , (1ciil+'--/ ,7*1ciil+'--/ ,6)9:L9OqQ  !SZ!^1ciil+*<<TWeL , C >> ,WK{8CUCUCWBZZ^_`^ccde  s   %G H  5HH c                 d   ^ U R                  S5      nSR                  [        U4S jU5      5      $ )N c                    > U T;  $ )N )sr   s    r&   <lambda>4StringNormalizer._remove_stopwords.<locals>.<lambda>   s	    %    )splitjoinfilter)textr   spls    ` r&   r7   "StringNormalizer._remove_stopwords}   s'    jjoxx7=>>rD   c           	          U R                  SSS9  U $ ! [         aa    [        R                  " SU 5      nSR	                  U Vs/ s H"  n[        R
                  " U5      (       a  M   UPM$     Os  snf sn5      n U s $ f = f)a  
Transforms accentuated unicode symbols into their simple counterpart.
Source: `sklearn/feature_extraction/text.py
<https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/
feature_extraction/text.py#L115>`_.

:param s: string
    The string to strip
:return: the cleaned string
ASCIIstrict)errorsNFKDr   )encodeUnicodeEncodeErrorunicodedata	normalizerF   	combining)rA   
normalizedcs      r&   r6   &StringNormalizer.strip_accents_unicode   sn    	 HHWXH.H! 	$..vq9JJOJqk6K6KA6NJOPAH	s!    0A?A.
'A.
-A?>A?r@   )NNNN)NNNNN)__name__
__module____qualname____firstlineno____doc__r'   staticmethodr   r7   r6   __static_attributes__r@   rD   r&   r   r      si      4l  - -^ ? ?  rD   r   )
r!   r-   rR   r2   numpyr   onnx.reference.op_runr   r   r   r@   rD   r&   <module>ra      s$        9Gu GrD   