U
    {h~(                     @   s   d dl Z d dlZdjZG dd dZG dd dZG dd dZG d	d
 d
eZdd Z	dd Z
G dd deZdd Zdd Zdd ZddddddddjZdeeeed  Zd!d" Zd#d$ Ze Zd%d& Zd'd( Zd.d*d+Zd,d- ZdS )/    N    c                   @   sT   e Zd ZdZdd Zdd Zejdkr0dd Znd	d Zd
d Z	dd Z
dd ZdS )UnicodeLiteralBuilderzAssemble a unicode string.
    c                 C   s
   g | _ d S N)charsself r   B/tmp/pip-unpacked-wheel-fhl22ezh/Cython/Compiler/StringEncoding.py__init__   s    zUnicodeLiteralBuilder.__init__c                 C   s,   t |tstdt| | j| d S )NzExpected str, got )
isinstancestrAssertionErrortyper   appendr   
charactersr   r   r	   r      s    zUnicodeLiteralBuilder.append  c                 C   sV   |dkrB|d8 }| j t|d d  | j t|d d  n| j t| d S )Nr               r   r   chrr   char_numberr   r   r	   append_charval   s
    z$UnicodeLiteralBuilder.append_charvalc                 C   s   | j t| d S r   r   r   r   r   r	   r   !   s    c                 C   s   |  | d S r   )r   r   r   Zescape_stringr   r   r	   append_uescape$   s    z$UnicodeLiteralBuilder.append_uescapec                 C   s   t d| jS )N )EncodedStringjoinr   r   r   r   r	   	getstring'   s    zUnicodeLiteralBuilder.getstringc                 C   s   d |   fS r   r!   r   r   r   r	   
getstrings*   s    z UnicodeLiteralBuilder.getstringsN)__name__
__module____qualname____doc__r
   r   sys
maxunicoder   r   r!   r#   r   r   r   r	   r      s   


r   c                   @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )BytesLiteralBuilderz*Assemble a byte string or char value.
    c                 C   s   g | _ || _d S r   )r   target_encodingr   r+   r   r   r	   r
   1   s    zBytesLiteralBuilder.__init__c                 C   s@   t |tr|| j}t |ts0ttt|| j| d S r   )	r   r   encoder+   bytesr   r   r   r   r   r   r   r	   r   5   s    
zBytesLiteralBuilder.appendc                 C   s   | j t|d d S )N
ISO-8859-1)r   r   r   r-   r   r   r   r	   r   ;   s    z"BytesLiteralBuilder.append_charvalc                 C   s   |  | d S r   )r   r   r   r   r	   r   >   s    z"BytesLiteralBuilder.append_uescapec                 C   s   t t| j| jS r   )bytes_literal
join_bytesr   r+   r   r   r   r	   r!   A   s    zBytesLiteralBuilder.getstringc                 C   s   |   S r   r"   r   r   r   r	   getcharE   s    zBytesLiteralBuilder.getcharc                 C   s   |   d fS r   r"   r   r   r   r	   r#   I   s    zBytesLiteralBuilder.getstringsN)r$   r%   r&   r'   r
   r   r   r   r!   r2   r#   r   r   r   r	   r*   .   s   r*   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )StrLiteralBuilderzDAssemble both a bytes and a unicode representation of a string.
    c                 C   s   t || _t | _d S r   )r*   _bytesr   _unicoder,   r   r   r	   r
   P   s    
zStrLiteralBuilder.__init__c                 C   s   | j | | j| d S r   )r4   r   r5   r   r   r   r	   r   T   s    zStrLiteralBuilder.appendc                 C   s   | j | | j| d S r   )r4   r   r5   r   r   r   r	   r   X   s    z StrLiteralBuilder.append_charvalc                 C   s   | j | | j| d S r   )r4   r   r5   r   r   r   r   r	   r   \   s    z StrLiteralBuilder.append_uescapec                 C   s   | j  | j fS r   )r4   r!   r5   r   r   r   r	   r#   `   s    zStrLiteralBuilder.getstringsN)	r$   r%   r&   r'   r
   r   r   r   r#   r   r   r   r	   r3   M   s   r3   c                   @   sL   e Zd ZdZdd Zdd Zdd Zedd	 Zd
d Z	dd Z
dd ZdS )r   Nc                 C   s   | S r   r   r   memor   r   r	   __deepcopy__j   s    zEncodedString.__deepcopy__c                 C   s   | j d k	st| | j S r   encodingr   r-   r   r   r   r	   
byteencodem   s    zEncodedString.byteencodec                 C   s   | j d kst| dS )NzUTF-8r9   r   r   r   r	   
utf8encodeq   s    zEncodedString.utf8encodec                 C   s
   | j d kS r   )r:   r   r   r   r	   
is_unicodeu   s    zEncodedString.is_unicodec                 C   s   t | S r   )string_contains_surrogatesr   r   r   r	   contains_surrogatesy   s    z!EncodedString.contains_surrogatesc                 C   s   t |  dS )Nutf8)r0   r<   r   r   r   r	   as_utf8_string|   s    zEncodedString.as_utf8_stringc                 C   s,   | j d kr|  }nt|  | j }| S r   )r:   rA   r0   r;   as_c_string_literal)r   sr   r   r	   rB      s    

z!EncodedString.as_c_string_literal)r$   r%   r&   r:   r8   r;   r<   propertyr=   r?   rA   rB   r   r   r   r	   r   d   s   
r   c                 C   s@   t t| D ]0}|dkr dS d|  kr0dkr
n q
 dS q
dS )z
    Check if the unicode string contains surrogate code points
    on a CPython platform with wide (UCS-4) or narrow (UTF-16)
    Unicode, i.e. characters that would be spelled as two
    separate code units on a narrow platform.
    r   Tr     F)mapord)ustringcr   r   r	   r>      s    r>   c                 C   sn   d}t jdk}tt| D ]P}|dk s,|dkr8|rh dS q|sB dS |dkrZ|rT dS d}q|sd dS d}q|S )a  
    Check if the unicode string contains lone surrogate code points
    on a CPython platform with wide (UCS-4) or narrow (UTF-16)
    Unicode, i.e. characters that would be spelled as two
    separate code units on a narrow platform, but that do not form a pair.
    Fr   r   rE   T  )r(   r)   rF   rG   )rH   Zlast_was_startZunicode_uses_surrogate_encodingrI   r   r   r	   string_contains_lone_surrogates   s     
rK   c                   @   s<   e Zd ZdZdd Zdd Zdd Zdd	 Zd
Zdd Z	dS )BytesLiteralNc                 C   s   | S r   r   r6   r   r   r	   r8      s    zBytesLiteral.__deepcopy__c                 C   s   t | S r   )r.   r   r   r   r	   r;      s    zBytesLiteral.byteencodec                 C   s   dst d|  d S )NFz this is not a unicode string: %r)r   r   r   r   r	   r<      s    zBytesLiteral.utf8encodec                 C   s
   |  dS )zcFake-decode the byte string to unicode to support %
        formatting of unicode strings.
        r/   )decoder   r   r   r	   __str__   s    zBytesLiteral.__str__Fc                 C   s   t t| }d| S )Nz"%s")split_string_literalescape_byte_string)r   valuer   r   r	   rB      s    z BytesLiteral.as_c_string_literal)
r$   r%   r&   r:   r8   r;   r<   rN   r=   rB   r   r   r   r	   rL      s   rL   c                 C   s    t | tstt| } || _| S r   )r   r.   r   rL   r:   rC   r:   r   r   r	   r0      s    r0   c                 C   s,   t | ttfstt| } |d k	r(|| _| S r   )r   r   r.   r   r   r:   rR   r   r   r	   encoded_string   s
    rS   c                 C   s"   t | trt| |S t| |S d S r   )r   r.   r0   rS   rR   r   r   r	   encoded_string_or_bytes_literal   s    

rT   
	)z\az\bz\fz\nz\rz\tz\v)\z??"    c                 C   sH   | dkrt | dd S | dkr$dS | dkr0dS dd	d
 | D S d S )Nz
	   r]   z\"r\   \\r   c                 S   s   g | ]}d t |dqS )r\   Z03o)rG   .0rI   r   r   r	   
<listcomp>   s     z'_to_escape_sequence.<locals>.<listcomp>)reprr    rC   r   r   r	   _to_escape_sequence   s    rg   c                     s   g } i t D ]:}ddd |D }| | t|d|d< qtdd|  djfdd  fd	d
}|S )Nr   c                 S   s   g | ]}d | dd qS )z[%s]r\   ra   )replacerb   r   r   r	   rd      s     z,_build_specials_replacer.<locals>.<listcomp>ASCIIz(%s)|c                    s    |  d S )Nr_   )group)m)replacementsr   r	   replace_specials  s    z2_build_specials_replacer.<locals>.replace_specialsc                    s
    | S r   r   rf   )rn   subr   r	   rh     s    z)_build_specials_replacer.<locals>.replace)
_c_specialr    r   rg   r-   recompilero   )Zsubexpsspecialregexprh   r   )rn   rm   ro   r	   _build_specials_replacer   s    
ru   c                 C   sV   |  d} | dkr"t| dd S | dkr.dS t| }|dk sF|dkrNd	| S | S d S )
Nr/   z
	\r_   r`   'z\'r^      z\x%02X)rM   re   rG   )rI   nr   r   r	   escape_char  s    
ry   c                 C   sp   t | } z| dW S  tk
r(   Y nX t }|j|j }}| D ]"}|dkr\|d|  qB|| qB|dS )zEscape a byte string so that it can be written into C code.
    Note that this returns a Unicode string instead which, when
    encoded as ASCII, will result in the correct byte sequence
    being written.
    ri   rw   s   \%03o)_replace_specialsrM   UnicodeDecodeError	bytearrayr   extend)rC   Zs_newr   r}   br   r   r	   rP     s    
rP     c                 C   s   t | |k r| S d}g }|t | k r|| }t | |d krd| |d | kr|d| |d | d 8 }| |d  dkr|d8 }||krn|| |d  d }qqn|| ||  |}qd|S d S )Nr      r\   r_      z"")lenfindr   r    )rC   limitstartchunksendr   r   r	   rO   /  s     $rO   c                 C   s&  t tt| dg } tjdkrvg |  }}| D ]F}|dkrht|d d\}}||d  ||d  q,|| q,n| g  }}| D ]r}d|  krdkrn nL|rd|d   krdkrn n,|d | }}|d	@ d
> |d	@  d |d< q|| q||krg }dtt|dtt|fS )zBCreate Py_UNICODE[] representation of a given unicode string.
    r   r   r   r   r   rE   r`   rJ   i  
   ,)	listrF   rG   r(   r)   divmodr   r    r   )rC   utf16utf32
code_pointhighlowZ	code_unitr   r   r	   encode_pyunicode_stringD  s$    


8
r   )r   )rq   r(   r    r1   r   r*   r3   r   r   r>   rK   r.   rL   r0   rS   rT   getZchar_from_escape_sequencetuplerF   r   rangerp   rg   ru   rz   ry   rP   rO   r   r   r   r   r	   <module>   s8   "$

