
    8h?                     x   S SK Jr  S SKJr  S SKJrJr  S SKrS SKJ	s  J
r  S SKJ	s  Js  Jr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJr  S	\S
\\   S\\\\4      4S jrS\S\S
\\   S\\\S4   \\S4   4   4S jrS\S	\S\\ \      S
\\   S\\\S4   \\S4   4   4
S jr!S\RD                  S\S
\\   S\\ \   \ \   4   4S jr#S\RH                  S\S
\\   S\RH                  4S jr%S\RL                  RN                  S\\(   S\4S jr)S\S\S
\\   S\\S4   4S jr*S\RH                  4S jr+g)    )defaultdict)Sequence)castOptionalN)	ShapeType)
DeviceMesh)DTensorSpec)_StridedShardPartial	Placement	ReplicateShard
mesh_shape
placementsreturnc           
         [        U5      [        U 5      :X  d$  [        S[        U5       S[        U 5       S35      e/ n[        [        5      n[	        5       n[        U5       GHb  u  pV[        U[        5      (       a   X6R                     R                  XV45        M;  UR                  XV45        [        U[        5      (       d  Md  UR                  U;   a  [        SU SU SU S35      eUR                  U;   d  M  UR                  UR                  5        UR                  UR                  5      nX   n[        U5      S:  d  M  UR                  5       u  pU
R                  U:X  d  [        S	U
R                   S
U S35      eXU	   -  nUR                  U	[        UR                  5      45        [        U5      S:  a  M|  GMe     U$ )a  
Replace Strided Shards with regular shards in an adjusted order.

Returns a list of (mesh_dim, placement) tuples where the list order is the sharding order.

ex.
[Shard(0), _StridedShard(0, split_factor=2), Shard(0)] ->
[(0, Shard(0)), (2, Shard(0)), (1, Shard(0))]

/Expected one placement per mesh dim, but found  placements and  mesh dims.zTStrided sharding does not allow Shard() to appear after the strided part has ended. z at mesh dim z in z violates this assumption.r   z@Can only convert _StridedShard to ordered Shard if split_factor(z) == aggregate mesh size ())lenRuntimeErrorr   listset	enumerate
isinstancer
   dimappendr   NotImplementedErroraddpopsplit_factor)r   r   ordereddeferred_strided_placementsstrided_part_ended_for_dimmesh_dimpstrided_placementsaggregate_sizestrided_mesh_dimstrideds              Y/var/www/fran/franai/venv/lib/python3.13/site-packages/torch/distributed/tensor/_utils.py_explicit_order_placementsr-      s    z?c*o-Z))9#j/9J+W
 	
 G"-d"3!$ ,a'''.55xmD NNH=)!U##5566-778cxjPT%,&@B  5577.221559)D)H)H)O&%/%9N01A54F4J4J4L1(&33~E"."bcjcwcwbx y<<J;K1!N#  '5E*FF(8%,'GH 01A55' -: N    global_shapemesh.c                 L    [        XR                  UR                  5       U5      $ )a  
Compute the local tensor shape and the global offsets into the original tensor
of a DTensor on its current global rank. This is useful for checkpointing purpose.

Example:
global_tensor = [[0,  1,  2,  3,  4], sharded on mesh (DP=2, TP=2) with (Shard(1), Shard(1))
                 [10, 11, 12, 13, 14]]

This table shows the return value of local_shape and global_offset for each rank.
(`local_tensor` is for illustration only).

Note how the first coordinate of global_offset is always 0, corresponding to tensor dim 0 being replicated.

Rank        local_tensor        local_shape     global_offset
-------------------------------------------------------------
0           [[0, 1],            (2, 2)          (0, 0)
             [10, 11]]

1           [[2],               (2, 1)          (0, 2)
             [12]]

2           [[3],               (2, 1)          (0, 3)
             [13]]

3           [[4],               (2, 1)          (0, 4)
             [14]]

Args:
    global_shape (ShapeType): The global shape of the DTensor.
    mesh (:class:`DeviceMesh`): The device mesh this DTensor is distributed on.
    placements (Sequence[:class:`Placement`]]): The placements of the DTensor.

Return:
    local_shape: the shape of the DTensor's _local_tensor on the current rank.
    global_offset: a tuple of offsets for each dimension of the global tensor shape,
    identifying how this shard fits into the global tensor in each dimension.

)&_compute_local_shape_and_global_offsetshapeget_coordinate)r/   r0   r   s      r,   %compute_local_shape_and_global_offsetr5   I   s&    R 2jj$"5"5"7 r.   my_coordinatec                    [        X5      nUc  g[        U 5      nS/[        U 5      -  nU H  u  pxX   n	[        U[        5      (       d  M   UR
                  n
S/[        U 5      -  nU
[        U5      :  d   SU
 S[        U5       35       eUR                  XZ   U	X'   5      u  pXU
'   XU
'   US:X  a  X
   Xj'   M  Xj   X   ::  a  X   Xj'   M  Xj==   X   -  ss'   M     [        U5      [        U5      4$ )N))r    r   Sharding dim  greater than tensor ndim )r-   r   r   r   r   r   _local_shard_size_and_offsettuple)r/   r   r6   r   ordered_placementslocal_shapeglobal_offsetr&   	placementmesh_dim_size	shard_dimlocal_offset
shard_sizeshard_offsets                 r,   r2   r2   x   s.    4JK<(c,//#5H&0M)U++%MM	 !sS%66 3{#33 #I;.H[IYHZ[3 ,5+Q+Q*!!+,(
 *4I&*6Y'? 0</FM, %/<3JJ3?3J0%0L4KK0; $6l [!5#777r.   tensorc           	         [        U R                  5       5      n[        U R                  5       5      n[        U5       GH  u  pVUR                  U5      nUR	                  5       (       a  [        [        U5      nUR                  S:  a  [        SU 35      eUR                  n	XR                  :  d   SU	 SU R                   SU S35       eX9   n
X-  X9'   [        [        U5      5       H  nX:w  d  M
  XK   XI   :  d  M  XK   U-  XK'   M!     M  [        U[        [        45      (       a  M  [        S[!        U5       S35      e   X44$ )	a  
Compute the global size and stride of a DTensor from the given local tensor.
The local size is multiplited by `world_size` per Sharding dim.
The local stride is multiplited by `world_size` per Sharding dim, as long as the
dimension is outside sharding dim.

For example, if we have a local tensor with size (4, 8, 2) and stride (16, 1, 8).
If the DTensor placements are [Shard(2)] and world_size is 2;
then the global size is (4, 8, 4) and stride is (16 * 2, 1, 8).

Args:
    tensor (:class:`torch.Tensor`):
        Local tensor which DTensor will be constructed from.
    mesh (:class:`DeviceMesh`):
        Object which describes the mesh topology
        of devices for the DTensor.
    placements (Sequence[:class:`Placement`]]):
        The attribute of the DTensor that describes its layout
        on the mesh topology.

Return:
    tensor_shape: A List of int which specifies the size of DTensor which build
        on top of the local tensor.
    tensor_stride: A List of int which specifies the stride of DTensor.
r   zOShard placements should have negative dims normalized in the user-facing APIs: r9   r:   z for placement number .zplacement type z not supported!)r   sizestrider   is_shardr   r   r   AssertionErrorndimranger   r   r   r   r   type)rF   r0   r   tensor_shapetensor_strideidxr@   rA   shard_placementrB   local_dim_sizeis               r,   compute_global_tensor_inforV      sR   8 &L)M#J/		#"5)4O""Q&$--<,=?  (++I{{* 	{*DV[[MQghkgllmn* *4N&4&DL# 3}-.>m&6-:R&R'4'7-'GM$ / I	7';<<i0AQRR3 04 &&r.   r3   c                    [        U5      S:w  a  [        S5      e[        U5      UR                  :w  a%  [        S[        U5       SUR                   S35      e[	        US   [
        5      (       a  U $ [	        US   [        5      (       Ga4  [        R                  " [        U 5      5      n[        UR                  5       5       Vs/ s H!  n[        R                  " X3R                  S9PM#     nn[        R                  " XS5        SnUS   R                   n[        UR                  5       Vs/ s H  oU:w  d  M
  UPM     n	nU HE  n
[        R"                  " X9   X   5      (       d  [        S5      eU
R%                  5       nXkU   -  nMG     [        U 5      nXlUS   R                   '   [        R&                  " U5      $ [        S	[)        US   5       S
35      es  snf s  snf )a  
Compute the global size of a DTensor from the given local tensor shape,
the mesh and placements. Different from `compute_global_tensor_info`,
which assumes sharding is even, this util allgathers local shards' shapes
from all ranks and thus can support uneven sharding.
NOTE: Currently this function only supports 1D mesh.

Args:
    shape (:class:`torch.Size`):
        Shape of the local tensor
    mesh (:class:`DeviceMesh`):
        Object which describes the mesh topology
        of devices for the DTensor.
    placements (Sequence[:class:`Placement`]]):
        The attribute of the DTensor that describes its layout
        on the mesh topology.

Return:
    tensor_shape: Shape of the global DTensor.
   z>compute_global_tensor_shape only supports 1 placement for now.r   r   r   r   )devicez?Non-sharded dimentions should have identical size across ranks.zPlacement type z not supported.)r   r   rM   r   r   r   r   torchrF   r   rN   rI   
empty_likerY   funcolall_gather_inplacer   equaltolistSizerO   )r3   r0   r   r>   _gathered_shaped_tensorssharded_dim_sumrB   d
other_dimsshape_tensorshape_tensor_listr/   s                r,   compute_global_tensor_shaperh      s   . :!!L
 	
 :$))#Z))9$))KQ
 	

 *Q-++	JqM5	)	)ll4;/ 499;'#
' [1C1CD' 	  #
 	!!"9GqM%%	!&tyy!1D!1A)^a!1
D3L;;{68PQQ"U  !- 3 3 5;;O 4 E{*9Z]&&'zz,''!d:a=12/B
 	
'#
 Es   (G.	G ;G op_callargsc                 f   U H  n[        U[        R                  [        45      (       a  UR                  s  $ [        U[
        [        45      (       d  MS  [        U5      S:  d  Md  [        US   [        R                  [        45      (       d  M  US   R                  s  $    [        SU  S35      e)zy
Find the device mesh object from args.
It returns None if no mesh is found.
NOTE: we can optimize this search if needed
r   z+Cannot find device mesh from args for op : rH   )	r   dtensorDTensorr	   device_meshr   r<   r   
ValueError)ri   rj   args      r,   try_find_mesh_from_argsrq   8  s     cGOO[9::??"sT5M**C13q6GOO[#ABBq6%%%  B7)1M
NNr.   global_stridec                   ^ ^ S/[        T 5      -  m[        U5       H{  u  p4UR                  5       (       d  M  [        [        U5      R
                  n[        [        T 5      5       H-  nT U   T U   :  d  M  TU==   UR                  U5      -  ss'   M/     M}     [        U U4S j[        [        T 5      5       5       5      $ )z
Compute the stride of a local tensor shard, given the global stride of the DTensor.
NOTE: Currently this function is assuming the DTensor is evenly shardable.
rX   c              3   :   >#    U  H  nTU   TU   -  v   M     g 7f)Nr8   ).0rU   rr   stride_divisorss     r,   	<genexpr>'compute_local_stride.<locals>.<genexpr>^  s$      8Q1aOA..8Qs   )	r   r   rK   r   r   r   rN   rI   r<   )rr   r0   r   mesh_idxr'   rU   jrv   s   `      @r,   compute_local_strider{   M  s     cC..O ,::<<UA""A 3}-. #mA&66#A&$))H*==& / -  8=c->P8Q  r.   c                 $   [        U [        R                  5      (       a  U $ [        U [        5      (       a  U /nOA[	        U 5      S:X  a'  [        U S   [
        5      (       a  [        U S   5      nO[        U 5      n[        R                  " U5      $ )z
Unify variable types of size argument to torch.Size
Acceptable types include:
    int, Sequence[int], Tuple[int], Tuple[Sequence[int]],
    or torch.Size
rX   r   )r   rZ   r`   intr   r   r   )rI   
torch_sizes     r,   normalize_to_torch_sizer   c  sn     $

##$V
	TaJtAw99$q']
$Z
::j!!r.   ),collectionsr   collections.abcr   typingr   r   rZ   )torch.distributed._functional_collectivesdistributed_functional_collectivesr\   torch.distributed.tensor._apirF   _apirl   torch._prims_commonr   torch.distributed.device_meshr   &torch.distributed.tensor._dtensor_specr	   (torch.distributed.tensor.placement_typesr
   r   r   r   r   r<   r}   r-   r5   r   r2   TensorrV   r`   rh   _ops
OpOverloadobjectrq   r{   r   r8   r.   r,   <module>r      s   # $ !  : : / / ) 4 > 22'/	':2eCN#$2j++#-+;CI;N+
5c?E#s(O+,+^D8D8D8 DI&D8 #	D8
 5c?E#s(O+,D8N8'LL8' *8'8@8K8'
49d3i 8'v;
::;
';
5=i5H;

ZZ;
|OZZ""O*26*:OO*$.<DY<O
38_,"UZZ "r.   