Skip to content

Commit

Permalink
add nccl group names (#2761)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #2761

Pull Request resolved: #2760

add descriptive name to grid sharding process group

Reviewed By: iamzainhuda

Differential Revision: D69970233

fbshipit-source-id: 515770e311d7104ee2c62e3e5f908a49c306a9b5
  • Loading branch information
jiayulu authored and facebook-github-bot committed Feb 22, 2025
1 parent e00868c commit 7500a0f
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions torchrec/distributed/comm.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,11 @@ def intra_and_cross_node_pg_2D(
group * devices_per_node : (group + 1) * devices_per_node
]
intra_pg_groups[group_rank].append(intra_pg_peers)
curr_intra_pg = dist.new_group(backend=backend, ranks=intra_pg_peers)
curr_intra_pg = dist.new_group(
backend=backend,
ranks=intra_pg_peers,
group_desc="sharding_intra_pg",
)
if my_rank in intra_pg_peers:
logger.warning(
f"[Connection] 2D rank {my_rank} -> intra_pg_peers {intra_pg_peers}"
Expand All @@ -256,7 +260,11 @@ def intra_and_cross_node_pg_2D(
intra_pg_group[j][cross_group_rank]
for j in range(len(intra_pg_group))
]
curr_cross_pg = dist.new_group(backend=backend, ranks=cross_pg_peers)
curr_cross_pg = dist.new_group(
backend=backend,
ranks=cross_pg_peers,
group_desc="sharding_cross_pg",
)
if my_rank in cross_pg_peers:
logger.warning(
f"[Connection] 2D rank {my_rank} -> cross_pg_peers {cross_pg_peers}"
Expand Down

0 comments on commit 7500a0f

Please sign in to comment.