
    hC                         S SK r S SKrS SKrS SKJr  S SKr\R                  \R                  \R                  \R                  S.rS r " S S5      r     SS jrg)	    N)AutoTokenizer)ztorch.int32ztorch.int64ztorch.float32ztorch.float16c                     SSK Jn  UR                  U R                  5       UR                  5       UR	                  5       UR                  5       -  UR                  R                  5        g )Nr   )cudart)cudar   
cudaMemcpydata_ptrelement_sizenelementcudaMemcpyKindcudaMemcpyDeviceToDevice)dstsrcr   s      p/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/phi2/inference_example.pycuda_memcpyr      sK    
S\\^+66	    c                   r    \ rS rSrS rS rS rS\R                  S\	S\	4S jr
 SS	 jrSS
 jrS rS rSrg)ORTGenerator!   c                     Xl         SU l        SU l        SU l        SU l        SU l        SU l        SU l        0 U l        g )N    P   i   r   F)	onnx_decoder_path	num_heads	head_size
num_layersmax_sequence_length	device_iduse_cuda_graphuse_traced_inputsstatic_inputs_map)selfdecoder_paths     r   __init__ORTGenerator.__init__"   sD    !-#' #!&!#r   c                    XR                   ;   a  g [        R                  " S5      n[        R                  " SU R                  5      n0 n[        R                  " US4[        R
                  US9US'   [        R                  " S/[        R                  US9US'   [        R                  " US/-  [        R
                  US9US'   [        R                  " S/[        R
                  US9US	'   XR                  U R                  U R                  4n[        U R                  5       Hk  n[        R                  " XS[        R                  S
9nUR                  SU 3UR                  5       SU 3UR!                  5       R                  5       05        Mm     [        R                  " USS4[        R                  US9US'   X@R                   U'   g )Ncpur      )dtypedevice	input_idsr   step	seqlens_ktotal_sequence_lengthr)   r(   	past_key_past_value_   logits)r    torchr)   r   zerosint32tensorint64r   r   r   ranger   float16update
contiguousclone)r!   
batch_size
cpu_devicecuda_device	static_iocache_shapeicaches           r   append_static_inputs!ORTGenerator.append_static_inputs-   sm   ///\\%(
ll64>>:	!&j!_EKKXc!d	+!LL!EKKT	&!&jA3.>ekkZe!f	+-2\\1#U[[Yc-d	)*!>>43K3KT^^\t'AKKu}}UE	!ou/?/?/A[QRPSCTV[VaVaVcVnVnVpqr ( $kk:q%*@^ij	(-6z*r   c           	      p   U R                   (       a  [        R                  O[        R                  U l        [        R
                  " US   U R                  [        R                  S9n[        R
                  " US   U R                  [        R                  S9nUR                  u  pEU R                  =(       a9    X@R                  ;   =(       a$    U R                  =(       a    U R                  (       + U l        U R                  (       d/  [        R
                  " S/U R                  [        R                  S9OU R                  U   S   nU R                  (       d2  [        R
                  " US/-  U R                  [        R                  S9OU R                  U   S   n[        XsR!                  S5      R#                  S5      R%                  [        R                  5      5        U R                  (       d9  [        R
                  " S/[        R                  " S5      [        R                  S9OU R                  U   S	   nXXS'   UR'                  5       UR'                  5       S
.n	U R(                  (       a  UR'                  5       U	S'   U R                  (       a)  UR'                  5       U	S'   UR'                  5       U	S	'   U	S	 U R                  (       a  U R*                  OSn
U R                  (       a  SX@R,                  XR.                  4OX@R,                  XR.                  4nU R                  (       d  [1        U R2                  5       H  n[        R4                  " XR                  U R                  S9nU R                  (       dG  U	R7                  SU 3UR'                  5       SU 3UR9                  5       R'                  5       05        M  U	R7                  SU 3UR'                  5       05        M     Oy[1        U R2                  5       H`  nU	R7                  SU 3U R                  U   SU 3   R'                  5       SU 3U R                  U   SU 3   R'                  5       05        Mb     [        R4                  " XESU R                  U R                  S9nSUR'                  5       0nU R                  (       d  U R                  (       a  SX@R,                  XPR.                  4OX@R,                  XPR.                  4n[1        U R2                  5       H  n[        R4                  " UU R                  U R                  S9nU R                  (       d9  UR7                  SU 3UR'                  5       SU 3UR'                  5       05        Mw  UR7                  SU 3UR'                  5       05        M     X4$ )Nr*   r.   attention_maskr   r+   r,   r'   r&   r-   )r*   rG      r/   r0   past_r1   r2   present_key_present_value_present_)use_fp16r3   r9   float32torch_dtyper6   r)   r5   shaper   r    use_buffer_share	packed_kvr   r7   r   sumsubtor;   use_stepr   r   r   r8   r   r4   r:   r<   )r!   encodings_dictr*   rG   r=   sequence_lengthr+   r,   total_seq_lengthinputspast_seq_length
past_shaperB   pastr2   outputspresent_shapepresents                     r   get_initial_inputs_and_outputs+ORTGenerator.get_initial_inputs_and_outputsD   s   ,0MM5==u}}LL!<T[[X]XcXcd	n5E&Ft{{bgbmbmn&/oo#
  #555#%%# NN"	 	 )) LL!T[[D''
3F; 	 )) LLqc)$++U[[Q''
3K@ 	
 	I11!488;>>u{{KL )) LL!U\\%%8L''
34KL 	
 . #--/,779

 ==!__.F6N"+"6"6"8F;.>.I.I.KF*+'(6:6K6K$22QR ~~ 
NNO^^Lnno~~N 	 %%4??+{{:kkIYIYZ  >> MMYqc?DOO4EUVTWGXZ^ZdZdZfZqZqZs"tu%sT__5F'GH , 4??+#A3)?)?
)KiXYWZO)\)g)g)i%aS)4+A+A*+MP[\][^N_+`+k+k+m , Z%[_[k[klV..01$$ >> JP ../>>R 
 4??+++mDKKtO_O_`
  >> NN's+W-?-?-A^TUSVCWY`YkYkYmn !8A39K9K9M(NO , r   modelrZ   r^   c           
      r   UR                  5       nS nUR                  5        H  u  pgUR                  UUR                  R                  UR                  R                  S:X  a  SOUR                  R
                  [        [        UR                  5         [        UR                  5      UR                  5       S9  UR                  nM     UR                  5        GHW  nUR                  n	U R                  (       a  SU	;   a  X)R                  SS5         nUR!                  U	UR                  R                  UR                  R
                  U R"                  (       a  [$        R&                  O[$        R(                  [        UR                  5      UR                  5       S9  M  X9   nUR!                  U	UR                  UR                  S:X  a  SOUR
                  U R"                  (       a  [$        R&                  O[$        R(                  [        UR                  5      UR                  5       S9  GMZ     U$ )Nr&   r   )namedevice_typer   element_typerP   
buffer_ptrr`   r]   )
io_bindingitems
bind_inputr)   typeindexpt_to_npreprr(   tuplerP   r   get_outputsre   rQ   replacebind_outputrM   npr9   rN   )
r!   rc   rZ   r^   ri   r)   kvoutputre   s
             r   apply_io_bindingORTGenerator.apply_io_binding   s   %%'
LLNDA!!HHMM xx}}5!188>>%d177m4AGGn::< "  XXF # '')F;;D$$d):<<	6:;&& !hhnn04"**2::. zz| '  M&& &#);;%#7aV\\04"**2::. zz| '  *. r   c                 ~   Xl         [        R                  " 5       nSUl        SUl        X`l        U R                   S:  a  SU R                   U R
                  S.4OSn[        R                  " U R                  Xx/S9U l        [        R                  " 5       U l
        [        R                  R                  5       (       a!  [        R                  " SU R                   5      O[        R                  " S5      U l        X l        X0l        X@l        XPl        [&        R(                  " S	S
S9U l        SU R*                  l        g )N   r   CUDAExecutionProvider)r   enable_cuda_graphCPUExecutionProvider)sess_options	providersr   r&   zmicrosoft/phi-2T)trust_remote_codez[PAD])r   ortSessionOptionslog_verbosity_levellog_severity_levelr   InferenceSessionr   sess
RunOptionsror3   r   is_availabler)   rM   rQ   rR   rV   r   from_pretrained	tokenizer	pad_token)	r!   r   rM   rQ   rR   rV   r   r   eps	            r   create_sessionORTGenerator.create_session   s     #))++,(*+', ~~" %DNNY]YlYl&mn' 	
 (()?)?lfjk	..">Cjj>U>U>W>Well64>>:]b]i]ijo]p  0" &667H\`a#* r   c                    U R                  U5      u  pVUS   R                  5       nUR                  u  pU	n
[        R                  " XR
                  [        R                  S9nU(       a  / nSnX:  Ga  U R                  U R                  XV5      nU(       a  [        R                  " 5       nUR                  5         U(       a  U R                  (       a  U R                  R                  SS5        U R                  R                  XR                  5        U R                  (       a8  U R                  R                  SU R                  (       a  [!        U5      OS5        SnO%U R                  R                  XR                  5        UR#                  5         U(       a)  [        R                  " 5       nWR%                  UW-
  5        US   S S 2SS S 24   n[        R&                  " USS	9nUU-  U R(                  R*                  :H  nUR-                  XR(                  R*                  5      R/                  US
/5      n[        R0                  " UU/SS	9n[        R2                  " U5      (       a  GO	U
S
-  n
UR5                  [        R6                  5      US'   U R                  (       a4  [9        U R:                  U   S   US   5        U R:                  U   S   US'   U R<                  (       az  [        R>                  " U
S
-
  /U R
                  [        R@                  S9US'   U R                  (       a4  [9        U R:                  U   S   US   5        U R:                  U   S   US'   U R                  (       a  US   nUU) R/                  US
5      -   R5                  [        R6                  5      US'   XS   S'   U R                  (       ad  [9        U R:                  U   S   US   5        U R:                  U   S   US'   US   S   U R:                  U   S   S'   U R:                  U   S   US'   OM[        R0                  " US   U) R/                  US
5      /S
5      R5                  [        R6                  5      US'   US   R                  S
   S
:w  aH  US   S S 2S S
2S S 24   RC                  5       US'   U R                  (       a  U R:                  U   S   US'   US   RE                  5         U RF                  (       Gdq  [I        U RJ                  5       H@  nU RL                  (       d  USU 3   USU 3'   USU 3   USU 3'   M2  USU 3   USU 3'   MB     US   R                  S
   nU RL                  (       a  SXRN                  UU RP                  4OXRN                  UU RP                  4n[I        U RJ                  5       H  n[        R                  " UU R
                  U RR                  S9nU RL                  (       dG  URU                  SU 3URC                  5       SU 3UR                  5       RC                  5       05        M  URU                  SU 3URC                  5       05        M     X:  a  GM  U(       aI  [W        SU SU	 SX)-
   35        [W        SSWS   -   SS[X        RZ                  " US
S  5      -   S35        g U R(                  R]                  USS9nU$ )Nr*   r.   Tgpu_graph_idz-1Fr2   )dimr'   r+   r,   r-   r   rG   rJ   r/   rK   r0   rL   rI   rH   zBatch size: z, Sequence length: z, Token num: zPrompt letency: i  zms, Token latency: ms)skip_special_tokens)/ra   r<   rP   r3   r4   r)   boolrx   r   timesynchronize_inputsr   r   add_run_config_entryrun_with_iobindingr   strsynchronize_outputsappendargmaxr   eos_token_idmasked_fillreshapecatallrU   r5   r   r    rV   r6   r7   r;   zero_rQ   r8   r   rR   r   r   rO   r:   printrt   meanbatch_decode)r!   rW   
max_lengthcuda_graph_annotation	benchmarkrZ   r^   all_token_idsr=   rX   current_lengthhas_eoslatency
prompt_runri   startendnext_token_logitsnext_tokenstokens_to_addprevious_seqlens_krB   new_sequence_lengthr_   r`   textss                             r   generate_implORTGenerator.generate_impl   sN   ==nM{+113&3&9&9#
(++jEJJOG
)..tyy&JJ		))+&&GG00F		,,ZA&&GG00&dF\F\,A(Bbf #
		,,ZA**,iiksU{+ !( 1!R( ;,,'8bAK +t~~/J/JJG (33G^^=X=XYaacmopbqrM!II}m&D"MM yy!! aN"/"2"25;;"?F;%%D22:>{KVT_M`a&*&<&<Z&H&U{#}}!&~/A.B4;;^c^i^i!jv)) 6 6z B6 JFSYN[%)%;%;J%G%OF6N""%+K%8"'9gX<N<Nz[\<]']&a&abgbmbm&n{#5C./2)) 6 6z B; OQWXcQde*.*@*@*L[*YF;'U[\sUtuvUwD**:67NOPQR6:6L6LZ6XYp6qF23+099,-/A/A*a/PQST,"U[[/ '(
 x &&q)Q.$+H$5a!Qh$?$J$J$L!))(,(>(>z(J8(TGH%H##%(((t/A>>29L:L2M1#/4;nQC<P4QQC01.5n.Eqc{+ 0 '--=&>&D&DQ&G# ~~ 
NN4GX$nn6I4>>Z 
 t/A#kk-SWScScdG  $~~  ".qc 2G4F4F4H"0 4gmmo6P6P6R %^^xs^W=O=O=Q,RS 0w )P zl*=o=Nm\f\x[yz $TGAJ%6$77J4RTRYRYZabcbdZeRfKfJggijk++Mt+Tr   c                 Z    U R                   R                  USS9nU R                  XBU5      $ )NT)padding)r   batch_encode_plusr   )r!   promptr   r   rW   s        r   generateORTGenerator.generatea  s/    99&$9O!!.>STTr   c                 4   Uu  pEXR-   n0 n[         R                  " SSXE4[         R                  S9R                  5       US'   [         R                  " XE4[         R                  S9R                  5       US'   U R                  XvUSS9  U R                  XvUSS9  g )	Nr   iX  )r(   r*   rG   F)r   T)r3   randintr5   tolistonesr   )r!   prompt_shape	token_numr   r=   rX   r   rW   s           r   generate_benchmarkORTGenerator.generate_benchmarkf  s    &2#
$0
&+mmAuz>[chcncn&o&v&v&x{#+0::z6S[`[f[f+g+n+n+p'( 	>7LX]^ 	>7LX\]r   )r)   r   r   r   r   r   r   rR   r   r   r    r   rO   rQ   r   rM   rV   r   N)TTFFF)F)__name__
__module____qualname____firstlineno__r#   rD   ra   r   r   dictrx   r   r   r   r   __static_attributes__ r   r   r   r   !   sT    	$7.[z&c&:&: &D &SW &R pu+2}~U
^r   r   c                    ^^ [        U 5      mTR                  X$XUT5        UU4S jnS/n	U(       d  U" U	5        U(       a9  Sn
S H0  nTR                  U5        S H  nX4nTR                  XUS9  M     M2     g g )Nc                    > [        U 5      nT(       a  TR                  US9  TR                  U SUS9n[        [        U5      5       H  n[	        SX   5        [	        SX#   5        M!     g )N)r=      )r   r   zPrompt: zTexts: )lenrD   r   r8   r   )r   example_batch_sizer   rB   	generatorr   s       r   
simple_runrun_phi2.<locals>.simple_run  se     [**6H*I""6cQc"ds5z"A*fi()UX& #r   zV```python
    def print_prime(n):
    """
    Print all primes between 1 and n
    """r   )r'   rH   r{      )   i   )r   )r   r   rD   r   )onnx_model_pathrQ   r   rR   rM   rV   r   run_benchmarkr   r   r   r=   rX   r   r   s         `       @r   run_phi2r   u  s     _-IY2BxYgh'	F 6 	&J**:6#, *<,,\\f,g $- ' r   )FTFFF)r   numpyrt   r3   transformersr   onnxruntimer   r5   r7   rN   r9   rn   r   r   r   r   r   r   <module>r      sf       &  8888ZZZZ	Q^ Q^p
 )hr   