From c16de51d39bc0262f6f44c2155c720db9f245097 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Mon, 16 Dec 2024 16:51:03 -0800 Subject: [PATCH 1/9] set gelu correctly --- tests/unit/ops/transformer/inference/test_gelu.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/unit/ops/transformer/inference/test_gelu.py b/tests/unit/ops/transformer/inference/test_gelu.py index 5f820ef3b579..782e03565cbf 100644 --- a/tests/unit/ops/transformer/inference/test_gelu.py +++ b/tests/unit/ops/transformer/inference/test_gelu.py @@ -9,6 +9,7 @@ from deepspeed.ops.op_builder import InferenceBuilder from deepspeed.ops.transformer import DeepSpeedInferenceConfig from deepspeed.ops.transformer.inference.op_binding.bias_gelu import BiasGeluOp +from deepspeed.utils.torch import required_torch_version if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) @@ -23,15 +24,11 @@ def allclose(x, y): def version_appropriate_gelu(activations): - global torch_minor_version - if torch_minor_version is None: - torch_minor_version = int(torch.__version__.split('.')[1]) - # If torch version = 1.12 - if torch_minor_version < 12: - return torch.nn.functional.gelu(activations) - else: + # gelu behavior changes (correctly) in torch 1.12 + if required_torch_version(min_version=1.12): return torch.nn.functional.gelu(activations, approximate='tanh') - + else: + return torch.nn.functional.gelu(activations) def run_gelu_reference(activations): # Expected behavior is that of casting to float32 internally and using the tanh approximation From db6851fc9c34b2dde3c05b38cef198964ff12778 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Mon, 16 Dec 2024 16:59:47 -0800 Subject: [PATCH 2/9] Remove all instances of torch_minor --- tests/unit/ops/transformer/inference/test_bias_geglu.py | 2 -- tests/unit/ops/transformer/inference/test_bias_gelu.py | 2 -- tests/unit/ops/transformer/inference/test_bias_relu.py | 2 -- tests/unit/ops/transformer/inference/test_gelu.py | 2 -- tests/unit/ops/transformer/inference/test_matmul.py | 1 - tests/unit/ops/transformer/inference/test_softmax.py | 2 -- 6 files changed, 11 deletions(-) diff --git a/tests/unit/ops/transformer/inference/test_bias_geglu.py b/tests/unit/ops/transformer/inference/test_bias_geglu.py index 05de4fbb4cf8..c995d2a8c46d 100644 --- a/tests/unit/ops/transformer/inference/test_bias_geglu.py +++ b/tests/unit/ops/transformer/inference/test_bias_geglu.py @@ -15,8 +15,6 @@ if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) -torch_minor_version = None - def run_bias_geglu_reference(activations, bias): # Expected behavior is that of casting to float32 internally diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py index b69030e87ace..e3a3bad63961 100644 --- a/tests/unit/ops/transformer/inference/test_bias_gelu.py +++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py @@ -16,8 +16,6 @@ if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) -torch_minor_version = None - def run_bias_gelu_reference(activations, bias): # Expected behavior is that of casting to float32 internally and using the tanh approximation diff --git a/tests/unit/ops/transformer/inference/test_bias_relu.py b/tests/unit/ops/transformer/inference/test_bias_relu.py index 57134665b241..69078f9f7646 100644 --- a/tests/unit/ops/transformer/inference/test_bias_relu.py +++ b/tests/unit/ops/transformer/inference/test_bias_relu.py @@ -15,8 +15,6 @@ if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) -torch_minor_version = None - def run_bias_relu_reference(activations, bias): # Expected behavior is that of casting to float32 internally diff --git a/tests/unit/ops/transformer/inference/test_gelu.py b/tests/unit/ops/transformer/inference/test_gelu.py index 782e03565cbf..54f762c6b232 100644 --- a/tests/unit/ops/transformer/inference/test_gelu.py +++ b/tests/unit/ops/transformer/inference/test_gelu.py @@ -14,8 +14,6 @@ if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) -torch_minor_version = None - def allclose(x, y): assert x.dtype == y.dtype diff --git a/tests/unit/ops/transformer/inference/test_matmul.py b/tests/unit/ops/transformer/inference/test_matmul.py index 559aa2c60afe..2ab195ee0115 100644 --- a/tests/unit/ops/transformer/inference/test_matmul.py +++ b/tests/unit/ops/transformer/inference/test_matmul.py @@ -12,7 +12,6 @@ pytest.skip("Inference ops are not available on this system", allow_module_level=True) inference_module = None -torch_minor_version = None def allclose(x, y): diff --git a/tests/unit/ops/transformer/inference/test_softmax.py b/tests/unit/ops/transformer/inference/test_softmax.py index e582be1b926a..83785ac38ebb 100644 --- a/tests/unit/ops/transformer/inference/test_softmax.py +++ b/tests/unit/ops/transformer/inference/test_softmax.py @@ -11,8 +11,6 @@ if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) -torch_minor_version = None - def allclose(x, y): assert x.dtype == y.dtype From 0bd98e462c94c4883feb9246a97659b3c6bf11eb Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Mon, 16 Dec 2024 17:06:26 -0800 Subject: [PATCH 3/9] Test with the same gelu always --- tests/unit/ops/transformer/inference/test_gelu.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/ops/transformer/inference/test_gelu.py b/tests/unit/ops/transformer/inference/test_gelu.py index 54f762c6b232..62338ff384f1 100644 --- a/tests/unit/ops/transformer/inference/test_gelu.py +++ b/tests/unit/ops/transformer/inference/test_gelu.py @@ -23,10 +23,7 @@ def allclose(x, y): def version_appropriate_gelu(activations): # gelu behavior changes (correctly) in torch 1.12 - if required_torch_version(min_version=1.12): - return torch.nn.functional.gelu(activations, approximate='tanh') - else: - return torch.nn.functional.gelu(activations) + return torch.nn.functional.gelu(activations) def run_gelu_reference(activations): # Expected behavior is that of casting to float32 internally and using the tanh approximation From ed5fb1f23d3ea704366482527311946609c2c2a7 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 17 Dec 2024 09:54:54 -0800 Subject: [PATCH 4/9] Add skip with correct logic --- tests/unit/ops/transformer/inference/test_bias_gelu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py index e3a3bad63961..5a82b95dd87b 100644 --- a/tests/unit/ops/transformer/inference/test_bias_gelu.py +++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py @@ -10,8 +10,8 @@ from deepspeed.ops.op_builder import InferenceBuilder from deepspeed.ops.transformer import DeepSpeedInferenceConfig from deepspeed.ops.transformer.inference.op_binding.bias_gelu import BiasGeluOp +from deepspeed.utils.torch import required_torch_version from .inference_test_utils import allclose, get_dtypes -from packaging import version as pkg_version if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) @@ -34,7 +34,7 @@ def run_bias_gelu_ds(activations, bias): @pytest.mark.parametrize("channels", [512, 1232, 4096]) @pytest.mark.parametrize("dtype", get_dtypes()) def test_bias_gelu(batch, sequence, channels, dtype): - if pkg_version.parse(torch.__version__) < pkg_version.parse("1.12"): + if required_torch_version(min_version=1.12): pytest.skip("gelu implementation matches only after torch 1.12") activations_ds = torch.randn((batch, sequence, channels), dtype=dtype, device=get_accelerator().device_name()) From 63d434c8a1fafeb9887efd145f1b1ebc6642da1a Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 17 Dec 2024 10:02:28 -0800 Subject: [PATCH 5/9] Switch to not check for skip --- tests/unit/ops/transformer/inference/test_bias_gelu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py index 5a82b95dd87b..f0a09245e890 100644 --- a/tests/unit/ops/transformer/inference/test_bias_gelu.py +++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py @@ -34,7 +34,7 @@ def run_bias_gelu_ds(activations, bias): @pytest.mark.parametrize("channels", [512, 1232, 4096]) @pytest.mark.parametrize("dtype", get_dtypes()) def test_bias_gelu(batch, sequence, channels, dtype): - if required_torch_version(min_version=1.12): + if not required_torch_version(min_version=1.12): pytest.skip("gelu implementation matches only after torch 1.12") activations_ds = torch.randn((batch, sequence, channels), dtype=dtype, device=get_accelerator().device_name()) From 1ac687c57933c4f4e876f472f654ffc4e4f31ac0 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 17 Dec 2024 10:17:50 -0800 Subject: [PATCH 6/9] Revert "Switch to not check for skip" This reverts commit 63d434c8a1fafeb9887efd145f1b1ebc6642da1a. --- tests/unit/ops/transformer/inference/test_bias_gelu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py index f0a09245e890..5a82b95dd87b 100644 --- a/tests/unit/ops/transformer/inference/test_bias_gelu.py +++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py @@ -34,7 +34,7 @@ def run_bias_gelu_ds(activations, bias): @pytest.mark.parametrize("channels", [512, 1232, 4096]) @pytest.mark.parametrize("dtype", get_dtypes()) def test_bias_gelu(batch, sequence, channels, dtype): - if not required_torch_version(min_version=1.12): + if required_torch_version(min_version=1.12): pytest.skip("gelu implementation matches only after torch 1.12") activations_ds = torch.randn((batch, sequence, channels), dtype=dtype, device=get_accelerator().device_name()) From f0bd65e7f6eaa96decb76f544ba85536879e5466 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 17 Dec 2024 10:17:54 -0800 Subject: [PATCH 7/9] Revert "Add skip with correct logic" This reverts commit ed5fb1f23d3ea704366482527311946609c2c2a7. --- tests/unit/ops/transformer/inference/test_bias_gelu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/ops/transformer/inference/test_bias_gelu.py b/tests/unit/ops/transformer/inference/test_bias_gelu.py index 5a82b95dd87b..e3a3bad63961 100644 --- a/tests/unit/ops/transformer/inference/test_bias_gelu.py +++ b/tests/unit/ops/transformer/inference/test_bias_gelu.py @@ -10,8 +10,8 @@ from deepspeed.ops.op_builder import InferenceBuilder from deepspeed.ops.transformer import DeepSpeedInferenceConfig from deepspeed.ops.transformer.inference.op_binding.bias_gelu import BiasGeluOp -from deepspeed.utils.torch import required_torch_version from .inference_test_utils import allclose, get_dtypes +from packaging import version as pkg_version if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]: pytest.skip("Inference ops are not available on this system", allow_module_level=True) @@ -34,7 +34,7 @@ def run_bias_gelu_ds(activations, bias): @pytest.mark.parametrize("channels", [512, 1232, 4096]) @pytest.mark.parametrize("dtype", get_dtypes()) def test_bias_gelu(batch, sequence, channels, dtype): - if required_torch_version(min_version=1.12): + if pkg_version.parse(torch.__version__) < pkg_version.parse("1.12"): pytest.skip("gelu implementation matches only after torch 1.12") activations_ds = torch.randn((batch, sequence, channels), dtype=dtype, device=get_accelerator().device_name()) From 3d58fafbcbedb7b64d4bf76184436e2ecfe541a7 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 17 Dec 2024 10:17:56 -0800 Subject: [PATCH 8/9] Revert "Test with the same gelu always" This reverts commit 0bd98e462c94c4883feb9246a97659b3c6bf11eb. --- tests/unit/ops/transformer/inference/test_gelu.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/ops/transformer/inference/test_gelu.py b/tests/unit/ops/transformer/inference/test_gelu.py index 62338ff384f1..54f762c6b232 100644 --- a/tests/unit/ops/transformer/inference/test_gelu.py +++ b/tests/unit/ops/transformer/inference/test_gelu.py @@ -23,7 +23,10 @@ def allclose(x, y): def version_appropriate_gelu(activations): # gelu behavior changes (correctly) in torch 1.12 - return torch.nn.functional.gelu(activations) + if required_torch_version(min_version=1.12): + return torch.nn.functional.gelu(activations, approximate='tanh') + else: + return torch.nn.functional.gelu(activations) def run_gelu_reference(activations): # Expected behavior is that of casting to float32 internally and using the tanh approximation From bb1c3c4dc1d2c54f7d3a0ba22765b6cd070d66f4 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Tue, 17 Dec 2024 10:22:08 -0800 Subject: [PATCH 9/9] Formatting --- tests/unit/ops/transformer/inference/test_gelu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/ops/transformer/inference/test_gelu.py b/tests/unit/ops/transformer/inference/test_gelu.py index 54f762c6b232..a58abfdb100c 100644 --- a/tests/unit/ops/transformer/inference/test_gelu.py +++ b/tests/unit/ops/transformer/inference/test_gelu.py @@ -28,6 +28,7 @@ def version_appropriate_gelu(activations): else: return torch.nn.functional.gelu(activations) + def run_gelu_reference(activations): # Expected behavior is that of casting to float32 internally and using the tanh approximation return version_appropriate_gelu(activations.to(torch.float32)).to(activations.dtype)