
    h&1                        S r SSKrSSKr\" S5      rSS jrS rSS jrSS jrS r	S	 r
\S
:X  aO  \" 5       r\" S\5        SSKJr  \" \R                   5        \R"                  r\
" \\5      r\ H  r\" \5        M     gg)zThis profiler result processor print out the kernel time spent on each Node of the model.
Example of importing profile result file from onnxruntime_perf_test:
    python profile_result_processor.py --input profile_2021-10-25_12-02-41.json
    N)ScanLoopIfc                 \   [         R                  " 5       nUR                  SSS[        SS9  UR                  SS[        SSS	9  UR                  S
S[        SSS	9  UR                  SSSSS9  UR                  SS9  UR                  SSSSS9  UR                  SS9  UR                  U 5      $ )Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.)r   r   defaultr	   z
--providercudazExecution provider to usez--kernel_time_only
store_truez.Only include the kernel time and no fence time)r   actionr	   )kernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrfloatset_defaults
parse_args)argvparsers     k/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/profile_result_processor.pyparse_argumentsr      s    $$&F
A   u   (   =	   /
kE,O
&T""    c                     [        SU  S35        [        U 5       n[        R                  " U5      nS S S 5        [	        W[
        5      (       d   eU$ ! , (       d  f       N'= f)Nzloading profile output z ...)printopenjsonload
isinstancelist)profile_fileopened_file	sess_times      r   load_profile_jsonr&   ;   sU    	#L>
67	l	{IIk*	 
 i&&&&	 
	s   A
A!c                 "   0 n0 n0 nSnSnU  H  nUS   S:X  a  US   S:X  a  SnU(       d  M   US   S:X  d  M+  S	U;   d  M3  S
U;   d  M;  SUS
   ;   d  MF  US   nUS
   S   n	U	[         ;   a  M_  U	(       d  SU S3n	X;   a  X8==   US	   -  ss'   XH==   S-  ss'   OUS	   X8'   SXH'   XU'   XWS	   -  nM     U(       d  S/$ / n
U
R                  SUS-  S S35        U
R                  S5        U
R                  S5        [        UR                  5       S SS9 HH  u  pX-  nX:  a  M  XH   nU[	        U5      -  nU
R                  US SUS-  S SUS SUS SU 3	5        MJ     0 nUR                  5        H   u  pX8   nX;   a  X==   U-  ss'   M  XU	'   M"     U
R                  S5        U
R                  S5        U
R                  S5        [        UR                  5       S  SS9 H'  u  pX-  nU
R                  US SUS-  S SU	 35        M)     U
$ )!a$  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

Args:
    sess_time (List[Dict]): profile data
    threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

Returns:
    List[str]: lines of string for output.
r   FcatSessionnamesession_initializationTKerneldurargsop_name()   zNo kernel record found!z%
Top expensive kernels with Time% >= d   .2f:@----------------------------------------------------------------u&   Total(μs)	Time%	Calls	Avg(μs)	Kernelc                     U S   $ Nr2    xs    r   <lambda>&parse_kernel_results.<locals>.<lambda>x   s    1Q4r   keyreverse10d	      Y@5.2f5d8.1fz
Group kernel time by operator:u   Total(μs)	Time%	Operatorc                     U S   $ r8   r9   r:   s    r   r<   r=      s    1Q4r   )_NODES_TYPE_CONTAINING_SUBGRAPHappendsorteditemsr   )r%   	thresholdkernel_name_to_op_namekernel_timekernel_freqtotalsession_inititemkernel_namer/   linesdurationratiocallsavg_timeop_times                   r   parse_kernel_resultsrZ   E   s~     KKEL;)#V8P(PL;("u}4IY]^dYeLev,K6l9-G99 k]!,)(DK7((A-(+/;(+,(6={3%[ E5 8 )** E	LL9)c/#9NaPQ	LL	LL=>!'(9(9(;Y]!^ (eEl*~Rd';2eBZr(SWXZ[fZghi "_ G 6 < < >+('G !? 
LL34	LL	LL./#GMMOQUV ~Rd';2gYGH W Lr   c                    / n0 n0 n0 nSnU  GH
  nUS   S:X  d  M  SU;   d  M  SU;   d  M  SUS   ;   d  M*  US   R                  SS	5      R                  S
S	5      R                  SS	5      n	SUS   ;   aA  US   S   S:X  a  Sn
OUS   S   S:X  a  Sn
OUS   S   S:X  a  Sn
X;  a  W
Xi'   OXi   W
:X  d   eO	U(       a  M  US   S   nU[        ;   a  M  X;   a  XI==   US   -  ss'   XY==   S-  ss'   OUS   XI'   SXY'   UR                  U	5        XxS   -  nGM     / SQnSnU Hb  n	XI   nXY   nU[        U5      -  nX-  S-  nUR	                  U	S	5      nUU-  nUR                  US SUS SUS SUS SUS SUS SU	 35        Md     UR                  SUS-  S S 35        UR                  S!5        UR                  S"5        [        UR                  5       S# S$S%9 Hc  u  pX-  nUU:  a  M  XY   nU[        U5      -  nX-  S-  nUR	                  U	S	5      nUR                  US SUS SUS SUS SUS SU	 35        Me     U$ )&a  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

Args:
    sess_time (List[Dict]): profile data
    kernel_time_only (bool, optional): Only include items for kernel time. Defaults to False.
    threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

Returns:
    List[str]: lines of string for output.
r   r(   Noder-   r.   r/   r*   _kernel_time _fence_before_fence_afterproviderCPUExecutionProviderCPUCUDAExecutionProviderCUDADmlExecutionProviderDMLr2   )z
Nodes in the original order:r6   u3   Total(μs)	Time%	Acc %	Avg(μs)	Calls	Provider	Nodeg        rC   rA   rB   rD   rF   rE   8sz#
Top expensive nodes with Time% >= r3   r4   r5   r6   u-   Total(μs)	Time%	Avg(μs)	Calls	Provider	Nodec                     U S   $ r8   r9   r:   s    r   r<   $parse_node_results.<locals>.<lambda>   s    qtr   Tr>   )replacerH   rI   r   getrJ   rK   )r%   r   rL   node_name_list	node_time	node_freqnode_providerrP   rR   	node_namedevicer/   rT   before_percentagerU   rW   rX   
percentagera   rV   s                       r   parse_node_resultsru      s    NIIME;& Ud]v~)W[\bWcJcV$$^R8@@RTU]]^lnpq  T&\)<
+/EE"F&\*-1HH#F&\*-1GG"F1/5M,(3v===!6l9-G99%$U3$$)$'+E{	$'(	$%%i0%[ EC HE
 #	'$eEl*&%/
 $$Y3Z'nBz$/r2CD1IHUY?Z\]bce\ffhiqrthuuw  yB  xC  D	
 $ 
LL7	C7LANO	LL	LLEF%ioo&7^UYZ	 9$eEl*&%/
 $$Y3~R
4'88D/ERT:UWX`acWddfgpfqrs  [ Lr   c                 4   0 n0 nSn0 n0 n0 n0 nSn0 n	U  GH"  n
U
S   S:X  d  M  SU
;   d  M  SU
;   d  M  SU
S   ;   d  M*  U
S   S   nU[         ;   a  M>  SU
S   ;  a.  SU
S	   ;   a#  X;   a  X{==   U
S   -  ss'   OU
S   X{'   XS   -  nMu  U
S   R                  SS
5      nX;   a  X==   S-  ss'   OSX'   U SU 3nX;   a  XM==   U
S   -  ss'   X]==   S-  ss'   OU
S   XM'   SX]'   X;   a  Xl==   U
S   -  ss'   OU
S   Xl'   X;   a  X==   U
S   -  ss'   X+==   S-  ss'   OU
S   X'   SX+'   X:S   -  nGM%     S
S/nUR                  S5        UR                  S5        [        UR	                  5       S SS9 He  u  pUR                  US5      nX-  nUU-   nUX8-   -  nX+   nUU-  nUR                  US SUS-  S SUS SUS-  S SUS SUS SUS SU 35        Mg     US
S/-  nUR                  S5        UR                  S5        [        UR	                  5       S SS9 Hl  u  pUR                  S5      nUS   nUS   nUR                  SS
5      nX]   nUU-  nXU   -  nUR                  US SUS-  S SUS SUS SUS SU 35        Mn     U$ ) zGroup results by operator name.

Args:
    sess_time (List[Dict]): profile data

Returns:
    List[str]: lines of string for output.
r   r(   r\   r-   r.   r/   ra   fencer*   r^   r2   r5   zGrouped by operatorr6   uM   Total(μs)	Time%	Kernel(μs)	Kernel%	Calls	AvgKernel(μs)	Fence(μs)	Operatorc                     U S   $ r8   r9   r:   s    r   r<   $group_node_results.<locals>.<lambda>0  s    QqTr   Tr>   rA   rB   rC   rD   11drE   z14.1fzGrouped by provider + operatoru<   Kernel(μs)	Provider%	Calls	AvgKernel(μs)	Provider	Operatorc                     U S   $ r8   r9   r:   s    r   r<   ry   >  s    RSTURVr   ExecutionProviderz9.2frh   )rH   rl   rI   rJ   rK   splitrk   )r%   op_kernel_timeop_kernel_recordstotal_kernel_timeprovider_op_kernel_timeprovider_op_kernel_recordsprovider_kernel_timeop_fence_timetotal_fence_timeprovider_counterrR   r/   ra   r?   rT   rN   
fence_timekernel_time_ratio
total_time
time_ratiokernel_callsavg_kernel_timepartsshort_eprW   provider_time_ratios                             r   group_node_resultsr      s    N !#M;& Ud]v~)W[\bWcJc6l9-G 99f-d6l*/%.$u+=.15e.$U3$F|''
B7H+ *a/*-. *Jay)C-',U;,*/14//3E{',23*//$.$u+=.15e$.('4;6'!*a/**.u+'-.!*e,W Z &'E	LL	LLgh &~';';'=>[_ `"&&w2
'; :-
#4#GH
(1%4#be!3D 9K;LBO`chOhimNnnpq}  A  qB  BD  ET  UZ  D[  []  ^h  il  ]m  mo  pw  ox  y	
 !a 
b233E	LL	LLTU"#:#@#@#B`de		#8(##$7<*/%-),JJ3r"5"=d!C2eBZrRabgQhhjkstvjwwy  {B  zC  D	
 f Lr   c                     [        U 5      n[        X!R                  5      nU[        X!R                  UR                  5      -  nU[        U5      -  nU$ N)r&   rZ   rL   ru   r   r   )r#   r.   profile_recordsrT   s       r   process_resultsr   M  sK    '5O ..AE	1F1FWWE	00ELr   __main__	Arguments)setup_loggerr   )r   )Fr   )__doc__r   r   	frozensetrH   r   r&   rZ   ru   r   r   __name__	argumentsr   benchmark_helperr   r   inputr#   resultsliner9   r   r   <module>r      s   
  "+,B"C '#TL^Tn_D	 z!I	+y!-""#??LlI6Gd  r   