
    hFU                     4   S r SSKrSSKrSSKrSSKrSSKJr  SSKJr  SSKrSSK	J
r
  SSKJr  SSKJrJrJr  SSKJrJrJr  SS	KJr   " S
 S5      rS"S\S\4S jjrS"S\S\4S jjr " S S5      rS\4S jrS\4S jr S"S\R:                  S\R<                  S-  4S jjrS r S r!\"S:X  a  \!" 5       r#\$" S\# 35        \#RJ                  c  \#RL                  S:X  a  SOS\#l%        \#RN                  (       aF  \RP                  RS                  5       (       d   e\#RT                  S:X  a  S \" 5       ;   d   eS!\#l+        O&\#RX                  (       a   e\#RV                  (       a   e\#RX                  (       d  \#RV                  (       a	  \" \#5        g\ " \#5        gg)#z]
Benchmark performance of SAM2 encoder with ORT or PyTorch. See benchmark_sam2.sh for usage.
    N)Mapping)datetime)SAM2ImageDecoder)SAM2ImageEncoder)decoder_shape_dictencoder_shape_dictload_sam2_model)InferenceSessionSessionOptionsget_available_providers)CudaSessionc            +          \ rS rSrSSSSSSSSSSSS\R
                  SS	SSSS
S4S\S\S\S\R                  S\S\S\S\S\S\S\S\	S\	S\	S\	S\S\	S\	S\	S\S\	4*S  jjr
S! rS"\\\\   4   4S# jrS"\\\R                  4   4S$ jrS%rg&)'
TestConfig   image_encoderCPUExecutionProvidermax-autotune      FT     
model_type	onnx_pathsam2_dirdevice	component
batch_sizeheightwidth
num_labels
num_points	num_masksmulti_mask_outputuse_tf32enable_cuda_graphprefer_nhwcwarm_upenable_nvtx_profileenable_ort_profileenable_torch_profilerepeatsverbosec                    US;   d   eU	S:  a  U	S::  d   eU
S:  a  U
S::  d   eXl         X l        X0l        XPl        X`l        Xpl        Xl        Xl        Xl        Xl	        Xl
        Xl        Xl        X@l        Xl        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        U R                  S:X  a(  U R                  S:X  a  U R                  S:X  d   S5       eg g )Nsam2_hiera_tinysam2_hiera_smallsam2_hiera_largesam2_hiera_base_plus   i   r   r   z7Only image size 1024x1024 is allowed for image encoder.)r   r   r   r   providertorch_compile_moder   r   r   r    r!   r"   r#   r   r$   r%   dtyper&   r'   r(   r)   r*   r+   r,   )selfr   r   r   r   r   r4   r5   r   r   r   r    r!   r"   r#   r$   r%   r6   r&   r'   r(   r)   r*   r+   r,   s                            m/var/www/fran/franai/venv/lib/python3.13/site-packages/onnxruntime/transformers/models/sam2/benchmark_sam2.py__init__TestConfig.__init__   s   6 pppp}4//|--$" " "4$
$$"!2 !2
&#6 "4$8!>>_,;;$&4::+=x?xx=+= -    c                     [        U 5       $ N)varsr7   s    r8   __repr__TestConfig.__repr__V   s    t*r;   returnc                     U R                   S:X  a+  [        U R                  U R                  U R                  5      $ [        U R                  U R                  U R                  U R                  U R                  5      $ )Nr   )	r   r   r   r   r   r   r    r!   r"   r?   s    r8   
shape_dictTestConfig.shape_dictY   sV    >>_,%doot{{DJJOO%dkk4::tPTP_P_aeaoaoppr;   c                    U R                   nU R                  S:X  aB  S[        R                  " U R                  SU R
                  U R                  XR                  S90$ [        R                  " SSSSXR                  S9[        R                  " SSS	S	XR                  S9[        R                  " SSSSXR                  S9[        R                  " S
SU R                  U R                  S4XR                  S9[        R                  " S
SU R                  U R                  4[        R                  U R                  S9[        R                  " U R                  SSSXR                  S9[        R                  " U R                  XR                  S9[        R                  " U R
                  U R                  /[        R                  U R                  S9S.$ )Nr   image   )r6   r   r          @      r   r      )image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_size)r6   r   torchrandnr   r   r   r   randrandintr    r!   int32zerosonestensor)r7   r6   s     r8   random_inputsTestConfig.random_inputs_   s^   

>>_,U[[!T[[$**\ajujuvww %*JJq"c3eT_T_$`$)JJq"c3eT_T_$`$)JJq#r2US^S^$_ %tdootB%XcXc! !&q4??DOO<EKKX\XcXc!  %{{4??AsCu]h]hi#(::dooUS^S^#_',||T[[$**4MUZU`U`imitit'u r;   )r   r   r   r6   r%   r(   r)   r*   r   r   r#   r    r"   r!   r   r&   r4   r+   r   r5   r$   r,   r'   r   N)__name__
__module____qualname____firstlineno__rV   float32strr   intboolr9   r@   r   listrD   Tensorr^   __static_attributes__ r;   r8   r   r      sn    )')"'"'mm!$)#(%*39y9y 9y 	9y
 9y 9y 9y 9y 9y 9y 9y 9y  9y  !9y"  #9y& '9y( )9y* "+9y, !-9y. #/9y0 19y2 39yvqGCcN3 qwsELL'89 r;   r   configrB   c                    U R                   (       a  [        S[        U 5       35        U R                  S:X  a  [	        U R
                  [        5      (       a  [        R                  R                  5       OU R
                  R                  n[        R                  " X R                  5      n[        U R                  5      US'   U R                   (       a  SUS'   U R                  U4S/nOS/n[#        U R$                  XS9nU$ )Nzcreate session for CUDAExecutionProviderr$   r   r&   r   )	providers)r,   printr>   r4   
isinstancer   re   rV   cudacurrent_deviceindexr   get_cuda_provider_optionsr%   rf   r$   r&   r
   r   )rl   session_options	device_idprovider_optionsro   ort_sessions         r8   create_ort_sessionrz   t   s    ~~#DL>23113=fmmS3Q3QEJJ--/W]WdWdWjWj	&@@LdLde'*6??';$./]+oo'78:PQ	+,	"6#3#3_ZKr;   c                     [        X5      n[        X R                  U R                  5      nUR	                  U R                  5       5        U$ r=   )rz   r   r   r%   allocate_buffersrD   )rl   rv   ry   cuda_sessions       r8   create_sessionr~      s?    $V=K{MM6;S;STL!!&"3"3"56r;   c                   0    \ rS rSrSrSS\4S jjrS rSrg)	OrtTestSession   z;A wrapper of ORT session to test relevance and performance.Nrl   c                 N    [        X5      U l        UR                  5       U l        g r=   )r~   ry   r^   	feed_dict)r7   rl   rv   s      r8   r9   OrtTestSession.__init__   s    )&B--/r;   c                 L    U R                   R                  U R                  5      $ r=   )ry   inferr   r?   s    r8   r   OrtTestSession.infer   s    %%dnn55r;   )r   ry   r=   )	r`   ra   rb   rc   __doc__r   r9   r   rj   rk   r;   r8   r   r      s    E0z 06r;   r   r}   c                     [         R                   " 5       nU R                  U5      n[         R                   " 5       nXB-
  $ r=   )timer   )r}   
input_dictstart_ends        r8   measure_latencyr      s0    IIKE:&A
))+C;r;   c                    U R                   R                  nUS:H  nU(       a  [        R                  R	                  S5      R
                  S:  aY  U R                  (       aH  S[        R                  R                  R                  l	        S[        R                  R                  l	        U=(       a    U R                  [        R                  :g  nU R                  5       n[        R                  " 5          [        R                  " XR                  US9   [!        U R"                  U R$                  U R                   S9nU R&                  S:X  Ga  U(       aT  U R(                  S:w  aD  [        R*                  " UR,                  R.                  U R(                  SS	S
9UR,                  l        U R1                  5       S   n[        R2                  " U5      R5                  U R                   U R                  S9n[7        U5      nU(       a)  U R(                  S:w  a  [9        SU R(                   S35        [;        U R<                  5       H  n	U" U5      u  pnM     U(       ag  U R>                  (       aV  SS K nSSKJ!n  URE                  5         [9        S5        URG                  S5         U" USS9  S S S 5        URI                  5         U(       a  U RJ                  (       a  [        RL                  RO                  [        RL                  RP                  RR                  [        RL                  RP                  RT                  /SS9 n[9        S5        [        RL                  RW                  S5         U" U5        S S S 5        S S S 5        [9        WRY                  5       R[                  SSS95        UR]                  S5        U R^                  S:X  a   S S S 5        S S S 5        g [9        SU R^                   S35        [`        R`                  " 5       n[;        U R^                  5       H5  n	U" U5      u  pnU(       d  M  [        R                  Rc                  5         M7     GOUS   US   US   US   US    US!   US"   US#   4n[e        UU Rf                  S$9nU(       a@  U R(                  S:w  a0  [        R*                  " UR.                  U R(                  SS	S
9Ul        [;        U R<                  5       H  n	U" U6 u  nnnM     U(       ah  U R>                  (       aW  SS K nSSKJ!n  URE                  5         [9        S%5        URG                  S5         U" US&S06  S S S 5        URI                  5         U(       a  U RJ                  (       a  [        RL                  RO                  [        RL                  RP                  RR                  [        RL                  RP                  RT                  /SS9 n[9        S'5        [        RL                  RW                  S(5         U" U6   S S S 5        S S S 5        [9        WRY                  5       R[                  SSS95        UR]                  S)5        U R^                  S:X  a   S S S 5        S S S 5        g [9        SU R^                   S35        [`        R`                  " 5       n[;        U R^                  5       H3  n	U" U6 u  nnnU(       d  M  [        R                  Rc                  5         M5     [`        R`                  " 5       nUU-
  U R^                  -  sS S S 5        sS S S 5        $ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GNl= f! , (       d  f       GNv= f! , (       d  f       O= fS S S 5        g ! , (       d  f       g = f)*Nrr   r      T)device_typer6   enabled)r   r   noneF)mode	fullgraphdynamicrG   )r   r6   zBRunning warm up. It will take a while since torch compile mode is .cudartz#Start nvtx profiling on encoder ...one_run)r(   )
activitiesrecord_shapesz$Start torch profiling on encoder ...encodercuda_time_total
   )sort_by	row_limitztorch_image_encoder.jsonzStart z runs of performance tests...rN   rO   rP   rQ   rR   rS   rT   rU   )multimask_outputz"Start nvtx profiling on decoder...r(   z$Start torch profiling on decoder ...decoderztorch_image_decoder.json)4r   typerV   rr   get_device_propertiesmajorr$   backendsmatmul
allow_tf32cudnnr6   rd   r^   inference_modeautocastr	   r   r   r   r5   compiler   forwardrD   rW   tor   rp   ranger'   r(   nvtxr   cudaProfilerStartannotatecudaProfilerStopr*   profilerprofileProfilerActivityCPUCUDArecord_functionkey_averagestableexport_chrome_tracer+   r   synchronizer   r#   )rl   r   is_cudaenabled_auto_cast
ort_inputs
sam2_modelimage_shapeimgsam2_encoderr   _image_features_0_image_features_1_image_embeddingsr   r   profr   torch_inputssam2_decoder_masks_iou_predictions_low_res_masksr   s                          r8   	run_torchr      s   --$$KV#G 5::33A6<<Afoo04""-*.'AFLLEMM$A%%'J				K||ev!w$V__f6G6GPVP]P]^
.644>38==,,4422"!	4
((0 !++-g6K++k*--V]]&,,-WC+J7L644>Z[a[t[tZuuvwx6>>*JVWZJ[G!6G + 655'((*;<]]9- $? .'')666^^++ % ? ? C CU^^EdEdEiEij"& ,  @A77	B$S) C d'')//8IUW/XY(()CD~~"W "x		Z F6>>**GHIIIKE6>>*JVWZJ[G!6G7JJ**, + -.-.-.>*>*=),-01	L ,!'!9!9L
 644>',}} ((22"!	($ 6>>*;G;V8(. + 655'((*:;]]9- ,IDI .'')666^^++ % ? ? C CU^^EdEdEiEij"& ,  @A77	B$l3 C d'')//8IUW/XY(()CD~~"M "x		P F6>>**GHIIIKE6>>*;G;V8(.7JJ**, +
 iikev~~-a "x!w		6 .- CB l .- CB w "x!w			s   4 ^>F^$%\8-B^$+],	]
	5]=A^$^>A^$9D^$	].B^$*+^^ 	^#A^$4^>A^$A^$%	^>8
]^$

]]
]+&^$.
]=8^$ 
^
^
^!^$$
^2	.^>>
_args
csv_writerc                 	   U R                   nU R                  nU R                  nU(       a8  [        R                  R                  5       n[        R                  " SU5      nSnOSn[        R                  " S5      nSnSn[        R                  [        R                  [        R                  S.n[        S+0 SU R                  _S	U R                  _S
U R                  _SU R                  _SU_SU R                  _SU R                   _SU R"                  _SU_SS_SU_SXR$                     _SU R&                  _SU R                  _SU R(                  _SU R*                  _SU R,                  _SU R.                  _SU R0                  _SS_6n	U R2                  S:X  Gad  [5        5       n
U R6                  U
l        U	R,                  (       a  SU
l        SU
l        SU
l        [?        X5      nU	RA                  5       n [C        U	R(                  5       H  n[E        X5      nM     U	R*                  (       aU  SS K%nSS!KJ&n  URO                  5         URQ                  S"5         URS                  U5      nS S S 5        URU                  5         U	R,                  (       a  URV                  RY                  5         US:X  a  g / n[C        U5       H  n[E        X5      nUR[                  U5        M!     [\        R^                  " U5      nAO2[        R`                  " 5           [c        U	5      n S S S 5        US:X  a  g U R2                  S#-   U(       a  SOS-   n0 SU R                  _SU R                  _SU R$                  _S$U_SU_SU	R&                  _SU	Rd                  _SU R                  _SU R                   _SU R"                  _S%U Rf                  _S&U	Rh                  _S'U	Rj                  _S(U	Rl                  _S)U R6                  _SU	R(                  _SU_U R*                  U R0                  UWS*.EnUb  URo                  U5        [I        [q        U	5       5        [I        U 5        g ! [F         a  n[I        SU	< S U 35         S nAg S nAff = f! , (       d  f       GN!= f! [F         a$  n[I        SU	< S U 35         S nAS S S 5        g S nAff = f! , (       d  f       GN= f),Nrr   rn   r   cpuFr   fp32fp16bf16r   r   r   r   r4   r   r   r   r   r$   Tr%   r6   r&   r+   r'   r(   r)   r*   r5   r,   ort   zFailed to run config=z. Exception: r   r   :use_gpur#   r    r!   r"   intra_op_num_threads)r(   r5   engineaverage_latencyrk   )9r   use_cuda_graphr+   rV   rr   rs   r   rd   float16bfloat16r   r   r   r   r   r   r   r   r6   r&   r'   r(   r)   r*   r5   r   r   r   enable_profilinglog_severity_levellog_verbosity_levelr~   r^   r   r   	Exceptionrp   r   r   r   r   r   r   ry   end_profilingappend
statisticsmeanno_gradr   r$   r   r    r!   r"   writerowr>   )r   r   r   r%   r+   rw   r   r4   dtypesrl   sess_optionssessionr   r   er   r   latency_listlatencyr   r   rows                         r8   run_testr     s    LLG"11<<GJJ--/	fi0*	e$!)mmU]]ENNSF ??..  ..	
  ?? {{ jj   , ZZ  $$    !44!"  22#$ "66%&  22'( )F. {{e%',0,E,E)$$,0L)./L+/0L, 6))+
	6>>*#G8 + %%#$$&y)MM*- *##%$$--/a<wA%g:G(   %//,7]]_"+F"3  a<[[3G&?FdooT^^ 	 	7	
 	. 	v)) 	FOO 	doo 	$++ 	 	T22 	f'' 	f'' 	V%% 	 9 9  	6>>!" 	7#$  $77"55*+C0 C 	T&\N	SEOE  	*6)=<=	 *)*  .vi}QC@A _ _sT   ;'Q! R
SR!
R+RR

R
S
&S8SS

S
Sc                 \   U R                   (       a  SOSnSR                  UU R                  [        R                  " 5       R                  S5      5      n[        USSS9 n/ SQn[        R                  " X4S	9nUR                  5         [        X5        S S S 5        g ! , (       d  f       g = f)
Ngpur   zbenchmark_sam_{}_{}_{}.csvz%Y%m%d-%H%M%Sa )r   newline)r   r   r6   r   r%   r&   r$   r   r   r   r#   r    r!   r"   r   r'   r+   r(   r5   r   r   )
fieldnames)r   formatr   r   nowstrftimeopencsv
DictWriterwriteheaderr   )r   featurescsv_filenamecsv_filecolumn_namesr   s         r8   run_perf_testr    s    u%H/660L
 
lb	1X
. ^^HF
 "7 
2	1	1s    4B
B+c            	         [         R                  " SS9n U R                  SSSS/SSS9  U R                  S	S/ S
QSSS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SS[        / SQSSS9  U R                  SS[        SSS9  U R                  SS[        SS S9  U R                  S!S[        SS"S9  U R                  S#S[        S$S%S9  U R                  S&S[        S'S(S9  U R                  S)S[
        S*S*S+/S,S-9  U R                  S.SSSS/S09  U R                  S1SSSS2S09  U R                  S3SSSS4S09  U R                  S5SSSS6S09  U R                  S7SSSS8S09  U R                  S9S[
        S:/ S;QS<S-9  U R                  S=S[
        S>S?S9  U R                  S@S[
        SASBS9  U R                  SCS[
        S / SDQSES-9  U R                  5       nU$ )FNz,Benchmark SMA2 for ONNX Runtime and PyTorch.)descriptionz--componentFr   image_decoderzDcomponent to benchmark. Choices are image_encoder and image_decoder.)requiredchoicesdefaulthelpz--dtyper   r   zData type for inference.z	--use_gpu
store_truezUse GPU for inference.)r
  actionr  )r   z--use_cuda_graphzUse cuda graph in onnxruntime.)r   z--intra_op_num_threads)r   r   rM   r   r      r   z&intra_op_num_threads for onnxruntime. )r
  r   r  r  r  z--batch_sizer   z
batch size)r
  r   r  r  z--heightr   zimage heightz--widthzimage widthz	--repeatsr   z8number of repeats for performance test. Default is 1000.z	--warm_upr   z)number of runs for warm up. Default is 5.z--enginer   rV   zengine for inference)r
  r   r  r  r  z--multimask_outputz:Export mask_decoder or image_decoder with multimask_output)r
  r  r  r  z--prefer_nhwcz;Use prefer_nhwc=1 provider option for CUDAExecutionProviderz--enable_nvtx_profilezVEnable nvtx profiling. It will add an extra run for profiling before performance test.z--enable_ort_profilezEnable ORT profiling.z--enable_torch_profilezYEnable PyTorch profiling. It will add an extra run for profiling before performance test.z--model_typer1   r.   zsam2 model namez
--sam2_dirz./segment-anything-2z6The directory of segment-anything-2 git root directoryz--onnx_pathz6./sam2_onnx_models/sam2_hiera_large_image_encoder.onnxzpath of onnx modelz--torch_compile_mode)zreduce-overheadr   zmax-autotune-no-cudagraphsr   z4torch compile mode. none will disable torch compile.)argparseArgumentParseradd_argumentset_defaultsrf   re   
parse_args)parserr   s     r8   _parse_argumentsr    s&   $$1_`F
 /2S   E+CVZt   %	   &
-	   u-
 #5            G   8    #   I   J   e   $    h   "c   &E   H!   YC   DKr;   __main__z
arguments:r   r   r   r   rn   Fr=   )-r   r  r   r   r   collections.abcr   r   rV   r	  r   r   r   
sam2_utilsr   r   r	   onnxruntimer
   r   r   *onnxruntime.transformers.io_binding_helperr   r   rz   r~   r   r   r   	Namespacer   r   r  r  r`   r   rp   r5   r   r   rr   is_availabler   r*   r(   rk   r;   r8   <module>r     s    
   #   * * N N Q Q BW Wtz DT $:  6 6+ |.j |.B )-{


{%{|"#JeP zD	Jtf
&48NNo4U.[a||zz&&((((;;%*.E.GGGG(-D% ++++,,,,4#<#<d+ r;   