From 2ed4a28504f3e26bda5c4fb260bd894d450556f8 Mon Sep 17 00:00:00 2001 From: harish Date: Thu, 14 Dec 2023 16:21:47 +0530 Subject: [PATCH 1/6] sort column headers for csv logger --- src/lightning/fabric/loggers/csv_logs.py | 1 + tests/tests_pytorch/loggers/test_csv.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/lightning/fabric/loggers/csv_logs.py b/src/lightning/fabric/loggers/csv_logs.py index 009edf97cd853..bf1d689c7329b 100644 --- a/src/lightning/fabric/loggers/csv_logs.py +++ b/src/lightning/fabric/loggers/csv_logs.py @@ -235,6 +235,7 @@ def save(self) -> None: new_keys = self._record_new_keys() file_exists = self._fs.isfile(self.metrics_file_path) + self.metrics_keys = sorted(self.metrics_keys) if new_keys and file_exists: # we need to re-write the file if the keys (header) change self._rewrite_with_new_header(self.metrics_keys) diff --git a/tests/tests_pytorch/loggers/test_csv.py b/tests/tests_pytorch/loggers/test_csv.py index 30f7f94c1516a..42b74df38aaa9 100644 --- a/tests/tests_pytorch/loggers/test_csv.py +++ b/tests/tests_pytorch/loggers/test_csv.py @@ -88,6 +88,22 @@ def test_log_metrics(tmp_path, step_idx): assert all(n in lines[0] for n in metrics) +@pytest.mark.parametrize("step_idx", [10, None]) +def test_log_metrics_column_order_sorted(tmp_path, step_idx): + logger = CSVLogger(tmp_path) + metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)} + logger.log_metrics(metrics, step_idx) + logger.save() + + path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) + with open(path_csv) as fp: + lines = fp.readlines() + + columns = list(metrics.keys()) + ['step'] + header = ','.join(sorted(columns)) + assert lines[0].strip() == header + + def test_log_hyperparams(tmp_path): logger = CSVLogger(tmp_path) hparams = { From 1e1fe4311b244aa2a2e2f7c9ea0700f1b7e37226 Mon Sep 17 00:00:00 2001 From: harish Date: Sun, 17 Dec 2023 23:56:01 +0530 Subject: [PATCH 2/6] refactor column sorting placement --- src/lightning/fabric/loggers/csv_logs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning/fabric/loggers/csv_logs.py b/src/lightning/fabric/loggers/csv_logs.py index bf1d689c7329b..d7bbf1ad720e8 100644 --- a/src/lightning/fabric/loggers/csv_logs.py +++ b/src/lightning/fabric/loggers/csv_logs.py @@ -235,7 +235,6 @@ def save(self) -> None: new_keys = self._record_new_keys() file_exists = self._fs.isfile(self.metrics_file_path) - self.metrics_keys = sorted(self.metrics_keys) if new_keys and file_exists: # we need to re-write the file if the keys (header) change self._rewrite_with_new_header(self.metrics_keys) @@ -254,6 +253,7 @@ def _record_new_keys(self) -> Set[str]: current_keys = set().union(*self.metrics) new_keys = current_keys - set(self.metrics_keys) self.metrics_keys.extend(new_keys) + self.metrics_keys.sort() return new_keys def _rewrite_with_new_header(self, fieldnames: List[str]) -> None: From 38aea2cf9cdc730d01a4ec08ea9071ed90d5329f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 Dec 2023 18:33:07 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_pytorch/loggers/test_csv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tests_pytorch/loggers/test_csv.py b/tests/tests_pytorch/loggers/test_csv.py index 42b74df38aaa9..477eda52c5c6d 100644 --- a/tests/tests_pytorch/loggers/test_csv.py +++ b/tests/tests_pytorch/loggers/test_csv.py @@ -98,9 +98,9 @@ def test_log_metrics_column_order_sorted(tmp_path, step_idx): path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) with open(path_csv) as fp: lines = fp.readlines() - - columns = list(metrics.keys()) + ['step'] - header = ','.join(sorted(columns)) + + columns = list(metrics.keys()) + ["step"] + header = ",".join(sorted(columns)) assert lines[0].strip() == header From 3d2f9642c1bc53a64f7d9c6f049b6ec65a3a565d Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 18 Dec 2023 07:49:00 +0100 Subject: [PATCH 4/6] move test --- tests/tests_fabric/loggers/test_csv.py | 16 ++++++++++++++++ tests/tests_pytorch/loggers/test_csv.py | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/tests_fabric/loggers/test_csv.py b/tests/tests_fabric/loggers/test_csv.py index 8590822edd906..49feed9d65e5f 100644 --- a/tests/tests_fabric/loggers/test_csv.py +++ b/tests/tests_fabric/loggers/test_csv.py @@ -183,3 +183,19 @@ def test_rewrite_with_new_header(tmp_path): assert header == new_columns logs = file.readline().strip().split(",") assert logs == ["0", "1", "22", ""] + + +@pytest.mark.parametrize("step_idx", [10, None]) +def test_log_metrics_column_order_sorted(tmp_path, step_idx): + logger = CSVLogger(tmp_path) + metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)} + logger.log_metrics(metrics, step_idx) + logger.save() + + path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) + with open(path_csv) as fp: + lines = fp.readlines() + + columns = list(metrics.keys()) + ["step"] + header = ",".join(sorted(columns)) + assert lines[0].strip() == header diff --git a/tests/tests_pytorch/loggers/test_csv.py b/tests/tests_pytorch/loggers/test_csv.py index 477eda52c5c6d..30f7f94c1516a 100644 --- a/tests/tests_pytorch/loggers/test_csv.py +++ b/tests/tests_pytorch/loggers/test_csv.py @@ -88,22 +88,6 @@ def test_log_metrics(tmp_path, step_idx): assert all(n in lines[0] for n in metrics) -@pytest.mark.parametrize("step_idx", [10, None]) -def test_log_metrics_column_order_sorted(tmp_path, step_idx): - logger = CSVLogger(tmp_path) - metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)} - logger.log_metrics(metrics, step_idx) - logger.save() - - path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) - with open(path_csv) as fp: - lines = fp.readlines() - - columns = list(metrics.keys()) + ["step"] - header = ",".join(sorted(columns)) - assert lines[0].strip() == header - - def test_log_hyperparams(tmp_path): logger = CSVLogger(tmp_path) hparams = { From 47cf42448977242bf6fe514b63a8fab8bad48068 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 18 Dec 2023 07:53:36 +0100 Subject: [PATCH 5/6] improve test --- tests/tests_fabric/loggers/test_csv.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/tests_fabric/loggers/test_csv.py b/tests/tests_fabric/loggers/test_csv.py index 49feed9d65e5f..8bb4476785a0c 100644 --- a/tests/tests_fabric/loggers/test_csv.py +++ b/tests/tests_fabric/loggers/test_csv.py @@ -185,17 +185,19 @@ def test_rewrite_with_new_header(tmp_path): assert logs == ["0", "1", "22", ""] -@pytest.mark.parametrize("step_idx", [10, None]) -def test_log_metrics_column_order_sorted(tmp_path, step_idx): +def test_log_metrics_column_order_sorted(tmp_path): + """Test that the columns in the output metrics file are sorted by name.""" logger = CSVLogger(tmp_path) - metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)} - logger.log_metrics(metrics, step_idx) + logger.log_metrics({"c": 0.1}) + logger.log_metrics({"c": 0.2}) + logger.log_metrics({"b": 0.3}) + logger.log_metrics({"a": 0.4}) + logger.save() + logger.log_metrics({"d": 0.5}) logger.save() - path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) + path_csv = os.path.join(logger.log_dir, _ExperimentWriter.NAME_METRICS_FILE) with open(path_csv) as fp: lines = fp.readlines() - columns = list(metrics.keys()) + ["step"] - header = ",".join(sorted(columns)) - assert lines[0].strip() == header + assert lines[0].strip() == "a,b,c,d,step" From d6e0447e55dae0c91072aaac945de31654ce6b98 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 18 Dec 2023 07:55:23 +0100 Subject: [PATCH 6/6] add changelog --- src/lightning/fabric/CHANGELOG.md | 3 +++ src/lightning/pytorch/CHANGELOG.md | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index cf424eee95bd4..81b01cbe7774e 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -26,6 +26,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed the `TransformerEnginePrecision(dtype=)` argument to `weights_dtype` and made it required ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) +- The columns in the `metrics.csv` file produced by `CSVLogger` are now sorted alphabetically ([#19159](https://github.com/Lightning-AI/lightning/pull/19159)) + + ### Deprecated - diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 5d0b03a742812..b8aaaff72b77f 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -41,6 +41,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed the `TransformerEnginePrecision(dtype=)` argument to `weights_dtype` and made it required ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) +- The columns in the `metrics.csv` file produced by `CSVLogger` are now sorted alphabetically ([#19159](https://github.com/Lightning-AI/lightning/pull/19159)) + + ### Deprecated - Deprecated all precision plugin classes under `lightning.pytorch.plugins` with the suffix `Plugin` in the name ([#18840](https://github.com/Lightning-AI/lightning/pull/18840))