From f7753173d0dfae01877a8346f1d79a5e06ed0548 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Sun, 5 Nov 2023 18:25:38 +0100 Subject: [PATCH 1/5] fix --- src/lightning/fabric/utilities/throughput.py | 2 ++ .../tests_fabric/utilities/test_throughput.py | 22 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/lightning/fabric/utilities/throughput.py b/src/lightning/fabric/utilities/throughput.py index 1afe5217e27c7..44841c00393a0 100644 --- a/src/lightning/fabric/utilities/throughput.py +++ b/src/lightning/fabric/utilities/throughput.py @@ -382,6 +382,8 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) -> chip = "v100-sxm" elif "v100-pcie" in chip: chip = "v100-pcie" + elif "v100s-pcie" in chip: + chip = "v100s-pcie" elif "t4" in chip: chip = "t4" elif "quadro rtx 5000" in chip: diff --git a/tests/tests_fabric/utilities/test_throughput.py b/tests/tests_fabric/utilities/test_throughput.py index 6bf7dbb5f7aa7..0a380a749b7d3 100644 --- a/tests/tests_fabric/utilities/test_throughput.py +++ b/tests/tests_fabric/utilities/test_throughput.py @@ -33,7 +33,7 @@ def test_measure_flops(): assert fwd_flops < fwd_and_bwd_flops -def test_available_flops(xla_available): +def test_get_available_flops(xla_available): with mock.patch("torch.cuda.get_device_name", return_value="NVIDIA H100 PCIe"): flops = get_available_flops(torch.device("cuda"), torch.bfloat16) assert flops == 1.513e15 / 2 @@ -61,6 +61,26 @@ def test_available_flops(xla_available): tpu.reset_mock() +@pytest.mark.parametrize("device_name", [ + # TODO: We need to represent the real names here + "h100-hbm3", + "h100-pcie", + "h100-hbm2e", + "a100", + "a10g", + "V100-sxm", + "v100-pcie", + "v100s-pcie", + "t4", + "quadro rtx 5000", +]) +@mock.patch("lightning.fabric.accelerators.cuda._is_ampere_or_later", return_value=False) +def test_get_available_flops_cuda_mapping_exists(_, device_name): + """Tests `get_available_flops` against known device names.""" + with mock.patch("lightning.fabric.utilities.throughput.torch.cuda.get_device_name", return_value=device_name): + assert get_available_flops(device=torch.device("cuda"), dtype=torch.float32) is not None + + def test_throughput(): # required args only throughput = Throughput() From 48c4a7e2cbdb14a14d9ae9aaeceaec6f6b08e956 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Sun, 5 Nov 2023 18:27:24 +0100 Subject: [PATCH 2/5] chlog --- src/lightning/fabric/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index 30d70c613b7a1..7c3bbf96fc4b6 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -41,6 +41,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed layer conversion under `Fabric.init_module()` context manager when using the `BitsandbytesPrecision` plugin ([#18914](https://github.com/Lightning-AI/lightning/pull/18914)) +- Fixed parsing of v100s GPUs in `get_available_flops` ([#18952](https://github.com/Lightning-AI/lightning/pull/18952)) + + ## [2.1.0] - 2023-10-11 ### Added From 0361ad12222d4ec9fe7491348d44f9c1040bca21 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 5 Nov 2023 17:27:56 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tests_fabric/utilities/test_throughput.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/tests_fabric/utilities/test_throughput.py b/tests/tests_fabric/utilities/test_throughput.py index 0a380a749b7d3..c62fbe2e13ec4 100644 --- a/tests/tests_fabric/utilities/test_throughput.py +++ b/tests/tests_fabric/utilities/test_throughput.py @@ -61,19 +61,22 @@ def test_get_available_flops(xla_available): tpu.reset_mock() -@pytest.mark.parametrize("device_name", [ - # TODO: We need to represent the real names here - "h100-hbm3", - "h100-pcie", - "h100-hbm2e", - "a100", - "a10g", - "V100-sxm", - "v100-pcie", - "v100s-pcie", - "t4", - "quadro rtx 5000", -]) +@pytest.mark.parametrize( + "device_name", + [ + # TODO: We need to represent the real names here + "h100-hbm3", + "h100-pcie", + "h100-hbm2e", + "a100", + "a10g", + "V100-sxm", + "v100-pcie", + "v100s-pcie", + "t4", + "quadro rtx 5000", + ], +) @mock.patch("lightning.fabric.accelerators.cuda._is_ampere_or_later", return_value=False) def test_get_available_flops_cuda_mapping_exists(_, device_name): """Tests `get_available_flops` against known device names.""" From 5af3c1c74fb9b9c236a9560279fdb301828ba0e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 6 Nov 2023 10:05:13 -0500 Subject: [PATCH 4/5] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- tests/tests_fabric/utilities/test_throughput.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/tests_fabric/utilities/test_throughput.py b/tests/tests_fabric/utilities/test_throughput.py index c62fbe2e13ec4..22f4d848552fe 100644 --- a/tests/tests_fabric/utilities/test_throughput.py +++ b/tests/tests_fabric/utilities/test_throughput.py @@ -66,15 +66,16 @@ def test_get_available_flops(xla_available): [ # TODO: We need to represent the real names here "h100-hbm3", - "h100-pcie", + "NVIDIA H100 PCIe", "h100-hbm2e", - "a100", - "a10g", + "NVIDIA A100 80GB PCIe", + "NVIDIA A100-SXM4-40GB", + "NVIDIA A10G", "V100-sxm", - "v100-pcie", - "v100s-pcie", - "t4", - "quadro rtx 5000", + "Tesla V100-PCIE-32GB", + "Tesla V100S-PCIE-32GB", + "Tesla T4", + "Quadro RTX 5000 with Max-Q Design", ], ) @mock.patch("lightning.fabric.accelerators.cuda._is_ampere_or_later", return_value=False) From d8f144907bc29390daddb33673f43da0445f3642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 6 Nov 2023 17:09:40 +0100 Subject: [PATCH 5/5] V100 sxm2 --- tests/tests_fabric/utilities/test_throughput.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_fabric/utilities/test_throughput.py b/tests/tests_fabric/utilities/test_throughput.py index 22f4d848552fe..06e63706d5213 100644 --- a/tests/tests_fabric/utilities/test_throughput.py +++ b/tests/tests_fabric/utilities/test_throughput.py @@ -71,7 +71,7 @@ def test_get_available_flops(xla_available): "NVIDIA A100 80GB PCIe", "NVIDIA A100-SXM4-40GB", "NVIDIA A10G", - "V100-sxm", + "Tesla V100-SXm2-32GB", "Tesla V100-PCIE-32GB", "Tesla V100S-PCIE-32GB", "Tesla T4",