From d7b3003db9dcaf3287389c12a78a33a762f8e483 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Fri, 16 Jul 2021 15:10:30 +0100 Subject: [PATCH 1/8] Remove error, add mixed to check --- pytorch_lightning/plugins/training_type/deepspeed.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index e704b662fd6ca..d47d4caa0321b 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -541,7 +541,7 @@ def _format_precision_config(self): amp_type = self.lightning_module.trainer.accelerator_connector.amp_type amp_level = self.lightning_module.trainer.accelerator_connector.amp_level precision = self.lightning_module.trainer.accelerator_connector.precision - if precision == 16: + if precision in (16, 'mixed'): if "fp16" not in self.config and amp_type == AMPType.NATIVE: # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") @@ -559,8 +559,6 @@ def _format_precision_config(self): "enabled": True, "opt_level": amp_level, } - if "zero_optimization" in self.config and not ("amp" in self.config or "fp16" in self.config): - raise MisconfigurationException("To use DeepSpeed ZeRO Optimization, you must set precision=16.") def _create_default_config( self, From 66e0ad0ffe6b069f187c7d8d4f38316a9ba957fb Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 09:59:30 +0100 Subject: [PATCH 2/8] Add test --- tests/plugins/test_deepspeed_plugin.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index f3d89b54ae236..bb555b5526b0b 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -448,6 +448,13 @@ def test_deepspeed_multigpu(tmpdir, deepspeed_config): _assert_save_model_is_equal(model, tmpdir, trainer) +@RunIf(min_gpus=1, deepspeed=True) +def test_deepspeed_fp32_works(tmpdir): + model = BoringModel() + trainer = Trainer(default_root_dir=tmpdir, gpus=1, plugins='deepspeed_stage_3', fast_dev_run=True) + trainer.fit(model) + + class ModelParallelClassificationModel(LightningModule): def __init__(self, lr: float = 0.01, num_blocks: int = 5): From 7312417abae63ef206c2d37eaae244ba8bf5cfef Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 10:00:54 +0100 Subject: [PATCH 3/8] Remove test --- tests/plugins/test_deepspeed_plugin.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index bb555b5526b0b..ce802a481e79d 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -224,21 +224,6 @@ def test_deepspeed_defaults(tmpdir): assert isinstance(plugin.config["zero_optimization"], dict) -@RunIf(min_gpus=1, deepspeed=True) -def test_invalid_deepspeed_defaults_no_precision(tmpdir): - """Test to ensure that using defaults, if precision is not set to 16, we throw an exception.""" - model = BoringModel() - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - plugins='deepspeed', - ) - with pytest.raises( - MisconfigurationException, match='To use DeepSpeed ZeRO Optimization, you must set precision=16.' - ): - trainer.fit(model) - - @RunIf(min_gpus=1, deepspeed=True, special=True) def test_warn_deepspeed_override_backward(tmpdir): """Test to ensure that if the backward hook in the LightningModule is overridden, we throw a warning.""" From 85b01caf92fb3438ed803ddf34e601ee957b8c49 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 10:03:35 +0100 Subject: [PATCH 4/8] Add changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49703527a56f3..412eaf32aacce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -405,6 +405,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `optimizer` argument in `LightningModule.manual_backward()`; Toggling optimizers in manual optimization should be done using `LightningModule.{un}toggle_optimizer()` ([#8287](https://github.com/PyTorchLightning/pytorch-lightning/pull/8287)) +- Removed DeepSpeed FP16 Exception as FP32 is now supported ([#8462](https://github.com/PyTorchLightning/pytorch-lightning/pull/8462)) + + ### Fixed - Fixed `lr_scheduler` checkpointed state by calling `update_lr_schedulers` before saving checkpoints ([#7877](https://github.com/PyTorchLightning/pytorch-lightning/pull/7877)) From 503105da9be4a71555ea64fb44ced57aebbca4f8 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 10:19:43 +0100 Subject: [PATCH 5/8] Add test for mixed --- tests/plugins/test_deepspeed_plugin.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index ce802a481e79d..bb02fae8f4c57 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -164,13 +164,17 @@ def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config): @RunIf(amp_native=True, deepspeed=True) +@pytest.mark.parametrize("precision", [ + 16, + 'mixed', +]) @pytest.mark.parametrize( "amp_backend", [ pytest.param("native", marks=RunIf(amp_native=True)), pytest.param("apex", marks=RunIf(amp_apex=True)), ] ) -def test_deepspeed_precision_choice(amp_backend, tmpdir): +def test_deepspeed_precision_choice(amp_backend, precision, tmpdir): """ Test to ensure precision plugin is also correctly chosen. DeepSpeed handles precision via Custom DeepSpeedPrecisionPlugin @@ -181,12 +185,12 @@ def test_deepspeed_precision_choice(amp_backend, tmpdir): default_root_dir=tmpdir, plugins='deepspeed', amp_backend=amp_backend, - precision=16, + precision=precision, ) assert isinstance(trainer.accelerator.training_type_plugin, DeepSpeedPlugin) assert isinstance(trainer.accelerator.precision_plugin, DeepSpeedPrecisionPlugin) - assert trainer.accelerator.precision_plugin.precision == 16 + assert trainer.accelerator.precision_plugin.precision == precision @RunIf(deepspeed=True) From 73af9f36cbeac2857cba8a8367647f0ae25b4b67 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 19 Jul 2021 11:38:40 +0200 Subject: [PATCH 6/8] Update tests/plugins/test_deepspeed_plugin.py --- tests/plugins/test_deepspeed_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index bb02fae8f4c57..3cc09abd0e4e3 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -166,7 +166,7 @@ def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config): @RunIf(amp_native=True, deepspeed=True) @pytest.mark.parametrize("precision", [ 16, - 'mixed', + 'mixed' ]) @pytest.mark.parametrize( "amp_backend", [ From e06cbb5a1a462473ed6fd31d1c46f57054e2b718 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 09:39:58 +0000 Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/plugins/test_deepspeed_plugin.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index 3cc09abd0e4e3..e1e4a2bd8d699 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -164,10 +164,7 @@ def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config): @RunIf(amp_native=True, deepspeed=True) -@pytest.mark.parametrize("precision", [ - 16, - 'mixed' -]) +@pytest.mark.parametrize("precision", [16, 'mixed']) @pytest.mark.parametrize( "amp_backend", [ pytest.param("native", marks=RunIf(amp_native=True)), From 92def4613bd78493791bf316556ab33d5b5ce2ac Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 19 Jul 2021 11:32:36 +0100 Subject: [PATCH 8/8] Add special --- tests/plugins/test_deepspeed_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/plugins/test_deepspeed_plugin.py b/tests/plugins/test_deepspeed_plugin.py index e1e4a2bd8d699..dea6bd141dfa1 100644 --- a/tests/plugins/test_deepspeed_plugin.py +++ b/tests/plugins/test_deepspeed_plugin.py @@ -434,7 +434,7 @@ def test_deepspeed_multigpu(tmpdir, deepspeed_config): _assert_save_model_is_equal(model, tmpdir, trainer) -@RunIf(min_gpus=1, deepspeed=True) +@RunIf(min_gpus=1, deepspeed=True, special=True) def test_deepspeed_fp32_works(tmpdir): model = BoringModel() trainer = Trainer(default_root_dir=tmpdir, gpus=1, plugins='deepspeed_stage_3', fast_dev_run=True)