Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable CustomProgressBar when trainer.enable_progress_bar is False #8663

Merged
merged 2 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions examples/nlp/language_modeling/megatron_bart_pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(
plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[ModelSummary(max_depth=3), CustomProgressBar()]
)
callbacks = [ModelSummary(max_depth=3)]
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

exp_manager(trainer, cfg.exp_manager)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

exp_manager(trainer, cfg.exp_manager)

Expand Down
8 changes: 5 additions & 3 deletions examples/nlp/language_modeling/megatron_gpt_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,13 @@ def remove_padded_prompts(response, nb_paddings):
@hydra_runner(config_path="conf", config_name="megatron_gpt_inference")
def main(cfg) -> None:

callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
# trainer required for restoring model parallel models
trainer = Trainer(
strategy=NLPDDPStrategy(timeout=datetime.timedelta(seconds=18000)),
**cfg.trainer,
callbacks=[CustomProgressBar()],
strategy=NLPDDPStrategy(timeout=datetime.timedelta(seconds=18000)), **cfg.trainer, callbacks=callbacks,
)

if cfg.gpt_model_file is not None:
Expand Down
6 changes: 5 additions & 1 deletion examples/nlp/language_modeling/megatron_retro_cal_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

# hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
with open_dict(cfg):
Expand Down
6 changes: 5 additions & 1 deletion examples/nlp/language_modeling/megatron_retro_fine_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)
exp_manager(trainer, cfg.exp_manager)

logging.info(f'Resuming training from checkpoint: {trainer.ckpt_path}')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

exp_manager(trainer, cfg.exp_manager)

Expand Down
6 changes: 5 additions & 1 deletion examples/nlp/language_modeling/megatron_retro_pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

exp_manager(trainer, cfg.exp_manager)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(
plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[ModelSummary(max_depth=3), CustomProgressBar()]
)
callbacks = [ModelSummary(max_depth=3)]
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)
exp_manager(trainer, cfg.exp_manager)

# update resume from checkpoint found by exp_manager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

exp_manager(trainer, cfg.exp_manager)

Expand Down
6 changes: 5 additions & 1 deletion examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,11 @@ def main(cfg) -> None:
if cfg.get('cluster_type', None) == 'BCP':
plugins.append(TorchElasticEnvironment())

trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=[CustomProgressBar()])
callbacks = []
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in cfg.trainer or cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer, callbacks=callbacks)

exp_manager(trainer, cfg.exp_manager)

Expand Down
14 changes: 7 additions & 7 deletions nemo/collections/nlp/parts/megatron_trainer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def _plugins(self) -> list:
def create_trainer(self, callbacks=None) -> Trainer:
strategy = self._training_strategy()
plugins = self._plugins()
if callbacks is None:
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in self.cfg.trainer or self.cfg.trainer.enable_progress_bar:
callbacks = [CustomProgressBar()]
return Trainer(plugins=plugins, strategy=strategy, **self.cfg.trainer, callbacks=callbacks)

Expand All @@ -145,12 +146,11 @@ class MegatronT5TrainerBuilder(MegatronTrainerBuilder):
def create_trainer(self) -> Trainer:
strategy = self._training_strategy()
plugins = self._plugins()
return Trainer(
plugins=plugins,
strategy=strategy,
**self.cfg.trainer,
callbacks=[ModelSummary(max_depth=3), CustomProgressBar()]
)
callbacks = [ModelSummary(max_depth=3)]
# enable_progress_bar is True by default. If cfg.trainer.enable_progress_bar=False, CustomProgressBar is not appended to callbacks
if 'enable_progress_bar' not in self.cfg.trainer or self.cfg.trainer.enable_progress_bar:
callbacks.append(CustomProgressBar())
return Trainer(plugins=plugins, strategy=strategy, **self.cfg.trainer, callbacks=callbacks)


class MegatronLMPPTrainerBuilder(MegatronTrainerBuilder):
Expand Down
Loading