
    7h86                       S SK Jr  S SKrS SKJr  S SKJrJrJrJ	r	J
r
  S SKrS SKJrJr  S SKJrJr  S SKJr  \	(       a  S SKJr  \R,                  R/                  \S	5      r\R,                  R/                  \S
5      r\\\
\\R:                  4         r\\\   /\4   r\R@                  " SS9 " S S5      5       r!\R@                  " SS9 " S S5      5       r"\R@                  " SS9 " S S5      5       r#    S+S jr$S,S jr%S-S jr&S.S jr'S/S jr(      S0S jr)        S1S jr*S2S jr+    S3S jr,    S3S jr-S4S jr.\R@                   " S S5      5       r/          S5S  jr0S6S! jr1 " S" S#\5      r2            S7S$ jr3      S8S% jr4\R@                  " SS9 " S& S'5      5       r5\R@                  " SS9 " S( S)5      5       r6      S9S* jr7g):    )annotationsN)Enum)AnyCallableOptionalTYPE_CHECKINGUnion)countersget_metrics_context)GraphPartitionMap	InputType)
OrderedSet)Sequence
perf_hintscudagraph_static_inputsT)frozenc                  $    \ rS rSr% SrS\S'   Srg)
FunctionID   z9Unique counter of a function wrapped in cudagraphify_implintid N__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r       Y/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/_inductor/cudagraph_utils.pyr   r      s
    ?Gr!   r   c                  B    \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
rg)PlaceholderInfo#   z
A serializable version of torch.fx.Node that contains information
pertinent to placeholder stack traces. We use these in logging and error messages
related to cudagraphs, and will cache these results.
strnameOptional[str]stack_tracelist[PlaceholderInfo]usersmutating_use_stack_tracer   Nr   r   r!   r"   r$   r$   #   s      I  ++r!   r$   c                  V    \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   Srg)WrappedFunction2   z
Represents a function that you want to record for CUDA graph replay,
with a little more metadata so we can identify if we have an applicable
CUDA graph in our CUDA graph tree for it.
zCallable[..., Any]modelSequence[int]static_input_idxsr   r   ztuple[torch.Tensor, ...]	constantsSequence[PlaceholderInfo]placeholdersmutated_input_idxsr   Nr   r   r!   r"   r.   r.   2   s,     $$N''++%%r!   r.   c                   [        U R                  5      S:X  a8  [        [        U R                  5      5      R                  R                  SS 5      $ U R                   Hh  nUR                  [        R                  R                  R                  R                  :X  d  MA  UR                  R                  SS 5      =n(       d  Mf  Us  $    g )N   r)   )lenr+   nextitermetagettargettorchopsatencopy_default)placeholder_nodeuser)   s      r"   &get_mutating_use_stack_trace_from_noderF   B   s     !!"a'D)//0166::=$OO%%::--555!hhll=$??{?"" &
 r!   c                    U R                   $ N)r,   )placeholder_infos    r"   get_mutating_use_stack_tracerJ   Q   s    444r!   c                    U R                   nU R                  R                  SS 5      n/ nS nU R                  S:X  a0  U R                   Vs/ s H  n[        U5      PM     nn[        U 5      n[        XX45      $ s  snf )Nr)   placeholder)r'   r<   r=   opr+   to_placeholder_inforF   r$   )rD   r'   r)   r+   r,   is         r"   rN   rN   U   s      D"''++M4@KE#m+1A1G1GH1GA$Q'1GH#I$
  4eNN Is   A9c                z    U R                    Vs/ s H  oR                  S:X  d  M  [        U5      PM!     sn$ s  snf )NrL   )nodesrM   rN   )graphnodes     r"   get_placeholder_inforT   d   s9    .3kk.9dWW=U!D!k  s   88c                    SU  3$ )Nzskipping cudagraphs due to r   )reasons    r"   format_default_skip_messagerW   j   s    (11r!   c                    SnU H  nX   n[        U5      =n(       d  M    O   [        S[        U5       S35      nU(       a  U SU 3$ U$ )N zmutated inputs (z instances). Found from : 
 )rJ   rW   r9   )r5   mutation_indicesr)   idxrL   msgs         r"   get_mutation_stack_tracer^   n   si     "$K"'6{CC;C  
 &
3/01=C (66Jr!   c                   [         R                  R                  R                  R                  (       a?  U R
                   Vs/ s H'  nX0R                  ;   a  M  U" X   5      (       a  M%  UPM)     nnOU R
                  n[        R                  SU R                  5        [        R                  SU5        U(       a  [        U R                  U5      $ S $ s  snf )Nz'check mutation static input indices: %sz#check mutation mutation indices: %s)r?   	_inductorconfigtritoncudagraph_treesr6   r2   static_inputs_logdebugr^   r5   )funcinputsis_cuda_graph_recorded_tensorr\   r[   s        r"   check_for_mutationri      s     $$44 ..+
.---  1=	 . 	 +
  22143I3I ACST  	!!2!24DE !+
s   CC*Cc                x    U R                    H*  nUR                  R                  SS 5      =n(       d  M(  Us  $    g )Nr)   )r+   r<   r=   )rS   rE   r)   s      r"   _get_use_stack_tracerk      s5    zz((,,}d;;;;  r!   c                   U R                  [        R                  " S5      S 5        [        R                  R                  R
                  (       a&  U R                  [        R                  " S5      S 5        U R                  [        R                  " S5      5      =n(       a=  SUR                   S3n[        U5      =n(       a  [        U SU 35      $ [        U5      $ [        U 5      S:X  a1  [        [        U R                  5       5      5      R                  S:X  a  g S U R                  5        5       n[        S	S
R                  U5       35      $ )Nr<   cpuzcpu device ()rZ   r8   cudac              3  8   #    U  H  n[        U5      v   M     g 7frH   )repr).0keys     r"   	<genexpr>:check_multiple_devices_or_any_cpu_nodes.<locals>.<genexpr>   s     A&@sc&@s   zmultiple devices: z, )popr?   devicer`   ra   graph_partitionr=   r'   rk   rW   r9   r:   r;   keystypejoin)device_node_mappingcpu_noder]   r)   	keys_reprs        r"   'check_multiple_devices_or_any_cpu_nodesr      s    ELL0$7--U 3T:&**5<<+>??x?X]]O1-.x88;8.#6H/VWW*3// 	 A%)..012776AA&9&>&>&@AI&);DIIi<P;Q'RSSr!   c                    [        U 5      $ rH   )r   )r|   s    r"    check_lowering_disable_cudagraphr      s     33FGGr!   c                    [         R                  U 5        [        S   S==   S-  ss'   [        5       nUR	                  5       (       a  UR                  SU SS9  g g )Ninductorcudagraph_skipsr8   cudagraph_skip_reasonT)	overwrite)perf_hint_logwarningr
   r   in_progressset)r]   metrics_contexts     r"   #log_cudagraph_skip_and_bump_counterr      sX    #Z*+q0+)+O""$$3SDI %r!   c                  *    \ rS rSr% S\S'   SS jrSrg)BoxedDeviceIndex   Optional[int]valuec                D    Ub  [        U[        5      (       d   eXl        g rH   )
isinstancer   r   )self
device_idxs     r"   r   BoxedDeviceIndex.set   s    !Z
C%@%@@@
r!   )r   N)r   r   returnNone)r   r   r   r   r   r   r    r   r!   r"   r   r      s     r!   r   c                n   [        S5      n[        R                  R                  R                  R
                  (       aZ  [        U5      nU Vs/ s H  ofU;  d  M
  UPM     nn[        U5      S:g  nU(       d  g [        U R                  5      n	[        X5      $ [        U5      S:g  nU(       d  S $ U$ s  snf )Nzmutated inputsr   )rW   r?   r`   ra   rb   rc   r   r9   rT   rR   r^   )
gmmutated_inputsr6   r2   default_msgunique_idxsr\   r[   has_mutationr5   s
             r"   3check_for_mutation_ignore_cuda_graph_managed_tensorr      s     ..>?K $$44 !23+=X+=CKAWC+=X+,1+BHH5'GG >*a/'t8[8 Ys   	B2B2c                    U R                   (       a  U R                   $ U R                   H"  nUR                   (       d  M  UR                   s  $    g)zE
Gets the first non-empty stack trace of a placeholder or its users.
N)r)   r+   )rL   users     r"   get_placeholder_stack_tracer      sF     &&&!!### " r!   c                  .    \ rS rSrSrSrSrSrS	S jrSr	g)
CheckInvariantStatus   r8            c                    U R                   S:X  a  gU R                   S:X  a  gU R                   S:X  a  gU R                    SU R                   3$ )NCudagraphManagedIdxMismatchz-cudagraph managed tensor data pointer changedStaticInputIdxMismatchz!static input data pointer changed&ExpectedDeadIndicesBeforeGraphMismatchz+expected dead indices before graph are livez: )r'   r   )r   s    r"   __str__CheckInvariantStatus.__str__  sK    9955BYY226YYBB@ii[4::,//r!   r   Nr   r&   )
r   r   r   r   SUCCESSr   r   r   r   r    r   r!   r"   r   r      s$    G #$  ./*0r!   r   c                   [        U5      [        U5      :X  a  [        U5      [        U 5      :X  d   S5       eU Vs/ s H  oQU   PM	     nnU Vs/ s H  oRU   PM	     nnU S3n[        [        Xg5      5       Hy  u  nu  p[        U	[        R
                  5      (       d   eX5   nU	R                  5       U
:w  d  MC  X   nU SUR                   SU
 SU	R                  5        S[        U5       S3
nM{     U$ s  snf s  snf )zq
Logs the mismatch between input data pointers and recorded data pointers.
This checks only idxs in target_idxs.
zClength mismatch between inputs, recorded_data_ptr, and placeholdersz.
zinput name: z. data pointer changed from z to z. input stack trace: 
)	r9   	enumeratezipr   r?   Tensordata_ptrr'   r   )r5   rg   recorded_data_ptrtarget_idxsmismatchrO   	t_tensorst_data_ptrs	error_msgtensorr   indexrL   s                r"   log_data_ptr_mismatchr     s    v;#/00S[CDU5U MU %00KqKI01<=AQ'K=*C I!*3y+F!GF&%,,////??(&-K+\+*:*:); <--5Jd6??;L:M N&&A+&N%OrS  "H  1=s   C8C=c                N  ^ [        U R                  5       5      S-   mSU4S jjn[        R                  R                  R
                  R                  (       aM  T[        R                  R                  R
                  R                  :  a  [        R                  U" 5       5        gg)Nr8   c                    > ST  S3$ )NzCUDAGraph supports dynamic shapes by recording a new graph for each distinct input size. Recording too many CUDAGraphs may lead to extra overhead. We have observed a0   distinct sizes. Please consider the following options for better performance: a) padding inputs to a few fixed number of shapes; or b) set torch._inductor.config.triton.cudagraph_skip_dynamic_graphs=True. Set torch._inductor.config.triton.cudagraph_dynamic_shape_warn_limit=None to silence this warning.r   )num_cudagraphss   r"   warn_msg4maybe_warning_due_to_dynamic_shape.<locals>.warn_msg<  s    00>/? @''		
r!   TFr   )	r9   ry   r?   r`   ra   rb   "cudagraph_dynamic_shape_warn_limitr   r   )fn_cachenew_int_keyr   r   s      @r"   "maybe_warning_due_to_dynamic_shaper   6  st     )A-N

 	%%HH
//
 
 
'
'
J
JK 	hj)r!   c                  8    \ rS rSr% SrS\S'   S\S'   S\S'   S	rg
)CudagraphCachedInfoiS  z
Info needed to realign inputs
r4   r5   list[Optional[str]]stack_tracesz	list[str]cudagraph_fail_reasonsr   Nr   r   r!   r"   r   r   S  s     ,+%%%%r!   r   c                  L    \ rS rSr% SrS\S'   S\S'   S\S'   S\S	'   S
\S'   Srg)CudagraphMetadatai^  z&
Metadata for recording a CUDA graph.
r4   r5   OrderedSet[int]r2   r6   r   r   zdict[str, torch.Tensor]r3   r   Nr   r   r!   r"   r   r   ^  s'     ,+&&''%%&&r!   r   c                v   / n[        5       n[        5       n[        U R                  5       H  u  pVXaR                  ;   a  UR	                  U5        XaR
                  ;   a  UR	                  U5        Ub  UR                  U   nO[        SU R                   SU 3S/ SS9nUR                  U5        M     / nU R                   H7  n	U	b   UR                  UR                  U	   5        M&  UR                  S5        M9     U R                   V
s0 s H  oUR                  U
   _M     nn
[        UUUUU5      $ s  sn
f )z
Convert the cudagraph metadata at the graph level to the graph partition level,
given the graph partition info (i.e., mapping from partition input/output index
to graph input/output index).
N
partition__placeholder_)r'   r)   r+   r,   )r   r   input_index_mappingr2   addr6   r5   r$   r   appendoutput_index_mappingr   constant_namesr3   r   )partition_mapmetadatapartition_placeholderspartition_static_input_idxspartition_mutated_input_idxspartition_input_idxgraph_input_idxrL   partition_stack_tracesgraph_output_idxr'   partition_constantss               r"    get_partition_cudagraph_metadatar   k  s`     3=<4>L 09))1, 888'++,?@999(,,-@A&"//@K *!-"2"2!3=AT@UV )-	K 	%%k2'1*  )>>'"))(*?*?@P*QR"))$/	 ? 4A3O3O3O4h  &&3O   #$ 	s   D6)rD   torch.fx.Noder   r(   )rI   r$   r   r(   )rD   r   r   r$   )rR   ztorch.fx.Graphr   r*   )rV   r&   r   r&   )r5   r4   r[   r1   r   r&   )rf   r.   rg   list[InputType]rh   zCallable[[torch.Tensor], bool]r   r(   )rS   r   r   r(   )r|   z!dict[torch.device, torch.fx.Node]r   r(   )r]   r&   r   r   )
r   ztorch.fx.GraphModuler   zOrderedSet[str]r6   r   r2   r1   r   r(   )rL   r$   r   r(   )r5   r4   rg   r   r   zSequence[Optional[int]]r   r1   r   r   r   r&   )r   z)dict[tuple[int, ...], Callable[..., Any]]r   r   r   bool)r   r   r   r   r   r   )8
__future__r   dataclassesenumr   typingr   r   r   r   r	   r?   torch._dynamo.utilsr
   r   torch._inductor.utilsr   r   torch.utils._ordered_setr   collections.abcr   _logginggetArtifactLoggerr   r   rd   listr   r   
OutputType	ModelType	dataclassr   r$   r.   rF   rJ   rN   rT   rW   r^   ri   rk   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   r"   <module>r      s   "   @ @  = > / ( 00<HNN44' 
 (5ell!2345
d9o&
23	 d#  $ d#, , $, d#& & $&#5O2+?L&
 $B 	>T:TT6H:HHJ      99#9 (9 %	9
 9004 00+ / 	
 # 	>7 
: d#& & $& d#	' 	' $	'3$33 3r!   