Skip to content

Commit

Permalink
Update all hf examples to have dist.barrier (#1139)
Browse files Browse the repository at this point in the history
Without having `dist.barrier()`, all of the HF examples wind up hanging
since we're destroying the pg before all comms have completed in these
small examples, leading to a hang. This PR adds `dist.barrier()` just
before `dist.destroy_process_group()` to fix this.
  • Loading branch information
muellerzr authored Aug 21, 2024
1 parent b8e01c2 commit 1bcb2bf
Show file tree
Hide file tree
Showing 18 changed files with 18 additions and 0 deletions.
1 change: 1 addition & 0 deletions examples/huggingface/pippy_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_blenderbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_camemBert.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_convBert.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_deberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")
Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_debertaV2.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_fnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_gptNeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_layoutLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_mbart.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_megatronBert.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_mobileBert.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_trOCR.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_unet.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down
1 change: 1 addition & 0 deletions examples/huggingface/pippy_xlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def run(args):
else:
out = schedule.step()

dist.barrier()
dist.destroy_process_group()
print(f"Rank {args.rank} completes")

Expand Down

0 comments on commit 1bcb2bf

Please sign in to comment.